Rationale: This script performs differential gene expression (dge) and calculates the upregulated and downregulated genes in lichen samples versus Xanthoria culture samples using edgeR and Sleuth

1. Prepare

Load the libraries

Create counts table from kallisto output which will be used for further dge analysis

counts<-read.delim2("../analysis_and_temp_files/06_meta_mapping/kallisto_mycobiont_only.txt",sep=" ")
metadata<-read.csv2("../analysis_and_temp_files/06_meta_mapping/metadata_shared_with_neha.csv",sep=",")

# Extract locus_tag from target_id (XANPAGTX0501 for nuclear genome and GTX0501mito for mito genome) - OPTIONAL
counts[c("locus_tag","rest")]<-str_split_fixed(counts$target_id,'_',n=2)

# Tabulate the counts
counts_tab<-pivot_wider(counts[c(1,4,6)],names_from=sample,values_from = est_counts)
head(counts_tab)
## # A tibble: 6 × 32
##   target_id   XBC2  XBA1  XSA2_2 XSC1  XSE2  XBE1  XTA2  XSC2  XSA2  XBA2  XBC1 
##   <chr>       <chr> <chr> <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 XANPAGTX05… 270   535   593.5  248   335.5 288   484   324.5 429.5 408   324.5
## 2 XANPAGTX05… 194   131   284.5  112   171.5 132   257.5 144   179.5 187   81   
## 3 XANPAGTX05… 191.5 305   264.5  152.5 288   367   332   246.5 187.5 140   367  
## 4 XANPAGTX05… 126   43.5  119.5  60.5  40.5  34.5  87.5  45    73    258.5 43.5 
## 5 XANPAGTX05… 38    223   221    91    38    20    267   98    179   196   32   
## 6 XANPAGTX05… 205   431   487    176   180   86    332   271   320   356   114  
## # ℹ 20 more variables: XTA1 <chr>, XSA1 <chr>, MP_I <chr>, XTC2 <chr>,
## #   XTE2 <chr>, XBE2 <chr>, XMC2 <chr>, MP_II <chr>, S_21XB1 <chr>,
## #   KS21XB1 <chr>, S_21XB3 <chr>, S_42XB1 <chr>, S_42XB2 <chr>, S_42XB3 <chr>,
## #   KS48XB1 <chr>, KS48XB2 <chr>, KS48XB3 <chr>, KS9XB1 <chr>, KS9XB2 <chr>,
## #   KS9XB3 <chr>
write.table(counts_tab,"../analysis_and_temp_files/08_dge_culture_lichen/counts_table.tsv",quote=F,sep="\t",row.names = F) # Final table that will be used for dge calculation

Read the counts table (or Start running the script from this step)

full<-read.delim("../analysis_and_temp_files/08_dge_culture_lichen/counts_table.tsv",row.names=1)
d<-full %>% select(-MP_I,-MP_II) #remove MP_I and MP_II samples (based on previous correlation heatmaps)
colnames(d)
##  [1] "XBC2"    "XBA1"    "XSA2_2"  "XSC1"    "XSE2"    "XBE1"    "XTA2"   
##  [8] "XSC2"    "XSA2"    "XBA2"    "XBC1"    "XTA1"    "XSA1"    "XTC2"   
## [15] "XTE2"    "XBE2"    "XMC2"    "S_21XB1" "KS21XB1" "S_21XB3" "S_42XB1"
## [22] "S_42XB2" "S_42XB3" "KS48XB1" "KS48XB2" "KS48XB3" "KS9XB1"  "KS9XB2" 
## [29] "KS9XB3"
head(d,5:5)
##                         XBC2  XBA1 XSA2_2  XSC1  XSE2  XBE1  XTA2  XSC2  XSA2
## XANPAGTX0501_000001-T1 270.0 535.0  593.5 248.0 335.5 288.0 484.0 324.5 429.5
## XANPAGTX0501_000002-T1 194.0 131.0  284.5 112.0 171.5 132.0 257.5 144.0 179.5
## XANPAGTX0501_000003-T1 191.5 305.0  264.5 152.5 288.0 367.0 332.0 246.5 187.5
## XANPAGTX0501_000004-T1 126.0  43.5  119.5  60.5  40.5  34.5  87.5  45.0  73.0
## XANPAGTX0501_000005-T1  38.0 223.0  221.0  91.0  38.0  20.0 267.0  98.0 179.0
##                         XBA2  XBC1  XTA1 XSA1  XTC2  XTE2  XBE2  XMC2 S_21XB1
## XANPAGTX0501_000001-T1 408.0 324.5 467.5  262 364.5 292.5 164.5 353.0   700.0
## XANPAGTX0501_000002-T1 187.0  81.0 178.5  163 199.0 183.0  99.0 113.0    66.5
## XANPAGTX0501_000003-T1 140.0 367.0 414.5  134 366.5 319.0 164.5 364.5   151.5
## XANPAGTX0501_000004-T1 258.5  43.5  59.5   74  53.0  29.5  47.0  27.5    11.0
## XANPAGTX0501_000005-T1 196.0  32.0 390.0  196 122.0 100.0  13.0  41.0    22.0
##                        KS21XB1 S_21XB3 S_42XB1 S_42XB2 S_42XB3 KS48XB1 KS48XB2
## XANPAGTX0501_000001-T1   302.5   716.0   452.5   333.0   356.5   454.5   462.5
## XANPAGTX0501_000002-T1   121.5    48.5   126.5    67.0    73.0   221.0   176.5
## XANPAGTX0501_000003-T1   216.0   142.0   206.5   202.5   217.5   197.5   214.0
## XANPAGTX0501_000004-T1   212.0    43.5    68.5    97.5    79.5    43.5    45.5
## XANPAGTX0501_000005-T1    17.0    22.0    27.0    19.0    56.0    16.0    17.0
##                        KS48XB3 KS9XB1 KS9XB2 KS9XB3
## XANPAGTX0501_000001-T1   464.0  376.0  297.0  323.5
## XANPAGTX0501_000002-T1   236.5   98.0   90.0  106.0
## XANPAGTX0501_000003-T1   209.0  313.5  216.5  268.5
## XANPAGTX0501_000004-T1    37.5   91.5   36.0   78.5
## XANPAGTX0501_000005-T1     6.0   24.0   21.0   21.0

Remove lowly expressed genes

# create new column, "max" for the max read count in each row
d$max<-apply(d,1,max)
min(d$max)
## [1] 0
d1<-d[which(d$max>=5),] #to remove lowly expressed genes, keep rows in which max read count across whole panel is atleast 5

# drop the max column, as its not needed anymore
raw_counts<-d1 %>% select(-max)
length(rownames(d1)) # total number of genes
## [1] 11105
length(rownames(raw_counts)) # number after removal of lowly expressed genes
## [1] 11105

2. Generate DGE list with Sleuth

library(sleuth)
kal_dirs <- data.frame("sample"=metadata$run_id, "condition"=metadata$sample_type,
                       "path"=paste0("../analysis_and_temp_files/06_meta_mapping/kallisto_meta_mapping/",metadata$run_id,"_kallisto")) %>% filter(sample!="MP_II" & sample!="MP_I")

#define list of genes to be included: only mycobiont and only those that have  read count across whole panel of >=5
target_id<-rownames(d1)
so <- sleuth_prep(kal_dirs, extra_bootstrap_summary = TRUE,filter_target_id = target_id,transformation_function = function(x) log2(x + 0.1))
## Warning in check_num_cores(num_cores): It appears that you are running Sleuth from within Rstudio.
## Because of concerns with forking processes from a GUI, 'num_cores' is being set to 1.
## If you wish to take advantage of multiple cores, please consider running sleuth from the command line.
## reading in kallisto results
## dropping unused factor levels
## .............................
## normalizing est_counts
## A list of target IDs for filtering was found. Using this for filtering
## 11105 targets passed the filter
## normalizing tpm
## merging in metadata
## summarizing bootstraps
## .............................
so <- sleuth_fit(so, ~condition, 'full')
## fitting measurement error models
## shrinkage estimation
## 16 NA values were found during variance shrinkage estimation due to mean observation values outside of the range used for the LOESS fit.
## The LOESS fit will be repeated using exact computation of the fitted surface to extrapolate the missing values.
## These are the target ids with NA values: XANPAGTX0501_000408-T1, XANPAGTX0501_000967-T1, XANPAGTX0501_000968-T1, XANPAGTX0501_000969-T1, XANPAGTX0501_000980-T1, XANPAGTX0501_000995-T1, XANPAGTX0501_002621-T1, XANPAGTX0501_002938-T1, XANPAGTX0501_004237-T1, XANPAGTX0501_005100-T1, XANPAGTX0501_008798-T1, XANPAGTX0501_009179-T1, XANPAGTX0501_010285-T1, XANPAGTX0501_010514-T1, XANPAGTX0501_010642-T1, XANPAGTX0501_010694-T1
## computing variance of betas
so <- sleuth_fit(so, ~1, 'reduced')
## fitting measurement error models
## shrinkage estimation
## 16 NA values were found during variance shrinkage estimation due to mean observation values outside of the range used for the LOESS fit.
## The LOESS fit will be repeated using exact computation of the fitted surface to extrapolate the missing values.
## These are the target ids with NA values: XANPAGTX0501_000408-T1, XANPAGTX0501_000967-T1, XANPAGTX0501_000968-T1, XANPAGTX0501_000969-T1, XANPAGTX0501_000980-T1, XANPAGTX0501_000995-T1, XANPAGTX0501_002621-T1, XANPAGTX0501_002938-T1, XANPAGTX0501_004237-T1, XANPAGTX0501_005100-T1, XANPAGTX0501_008798-T1, XANPAGTX0501_009179-T1, XANPAGTX0501_010285-T1, XANPAGTX0501_010514-T1, XANPAGTX0501_010642-T1, XANPAGTX0501_010694-T1
## computing variance of betas
so <- sleuth_lrt(so, 'reduced', 'full')
so <- sleuth_wt(so, 'conditionxanthoria_culture')
plot_pca(so, color_by = 'condition', text_labels = TRUE,use_filtered=T,units="tpm")

sleuth_table <- sleuth_results(so, 'conditionxanthoria_culture')
sig <- sleuth_table %>% tibble::as_tibble() %>% 
  filter( qval <= 0.05, grepl("GTX0501",target_id) ) %>% 
  arrange(desc(b)) %>% mutate(change = if_else(b > 1, "culture", ifelse(b< -1, "lichen", "low_logFC")))

table(sig$change)
## 
##   culture    lichen low_logFC 
##       564      1185      4124
pca<-plot_pca(so, color_by = 'condition', text_labels = F,use_filtered=T,units="tpm")
pca+theme_bw()+theme(text=element_text(size=7))

ggsave('../results/lichen_culture_pca.pdf',width = 4.5, height = 3)
plot_pc_variance(so,use_filtered=T,units="tpm")

ppv <- plot_pc_variance(so,use_filtered=T,units="tpm")
PCpc <- ppv$data$var
names(PCpc) <- paste0("PC", seq(1:length(PCpc)))
PCpc
##       PC1       PC2       PC3       PC4       PC5 
## 58.442702 23.237991  6.167265  3.743065  2.602711

2.3. Save gene lists

funannot<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/Annotation_with_OG.txt",sep="\t")


sig2<-sig %>% left_join(funannot, by=c("target_id"="TranscriptID"))

upreg_sl=sig2 %>% filter(change=="lichen") %>% select(-c(CDS.transcript,Translation,gDNA,mRNA))
downreg_sl=sig2 %>% filter(change=="culture")%>% select(-c(CDS.transcript,Translation,gDNA,mRNA))

write.table(upreg_sl, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_lichen_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)
write.table(downreg_sl, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_culture_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)

tabd_df <- so$obs_norm[so$obs_norm$target_id %in% target_id,]
tabd_df <- dplyr::select(tabd_df, target_id, sample, 
            tpm)
tabd_df <- reshape2::dcast(tabd_df, target_id ~ sample, 
            value.var = "tpm") 
write.table(tabd_df, "../analysis_and_temp_files/08_dge_culture_lichen/norm_counts_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)

2.4. bootstrap of several genes

  • Picked the gene with maximal b-value (log2 fold changes between conditions). Here, b-value > 0 means overrexpression in culture
plot_bootstrap(so, 
               target_id = sig$target_id[which.max(sig$b)], 
               units = "est_counts", 
               color_by = "condition")

  • Picked the gene with minimal b-value, i.e. most strongly overrexpressed in lichen
plot_bootstrap(so, 
               target_id = sig$target_id[which.min(sig$b)], 
               units = "est_counts", 
               color_by = "condition")

  • Picked the gene with minimal absolute b-value, i.e. with the lowest fold change difference. The b for it is sig$b[which.min(abs(sig$b))] and the q-value (p-value adjusted for multiple test correction) is sig$qval[which.min(abs(sig$b))], but you can see the difference between sample types somewhat
plot_bootstrap(so, 
               target_id = sig$target_id[which.min(abs(sig$b))], 
               units = "est_counts", 
               color_by = "condition")

2.5. GO enrichment analysis on sleuth

###make table with GO annotations
go_df <-funannot %>% select(TranscriptID,GO.Terms_new) %>% 
  mutate(GO.Terms_new = strsplit(GO.Terms_new, ",")) %>%
        unnest(GO.Terms_new) %>%
  mutate(GO.Terms=sub(".*? ", "", GO.Terms_new),
         short_term = substr(GO.Terms, 1,40))

go_data <- list(
    term2protein = data.frame(
                        term = go_df$GO.Terms_new,
                        gene = go_df$TranscriptID
                        ),
    term2name = data.frame(
                        term = go_df$GO.Terms_new,
                        name = go_df$short_term
                        ),
    
    universe = unique(as.character(go_df$TranscriptID))
)


### select genes upregulated in lichens and order them by fold change
geneList5 = sig$b[sig$change=="lichen"]
names(geneList5) = sig$target_id[sig$change=="lichen"]
geneList5 = sort(geneList5, decreasing = F)
geneList5 <- names(geneList5)

###enrichment analysis
enrich5<-clusterProfiler::enricher(geneList5,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## Registered S3 method overwritten by 'ggtree':
##   method      from 
##   identify.gg ggfun
enrichplot::dotplot(enrich5,showCategory=40,label_format=40)

enrich5_pairwise<-enrichplot::pairwise_termsim(enrich5)
enrichplot::emapplot(enrich5_pairwise)

### select genes upregulated in lichens and order them by fold change
geneList6 = sig$b[sig$change=="culture"]
names(geneList6) = sig$target_id[sig$change=="culture"]
geneList6 = sort(geneList6, decreasing = T)
geneList6 <- names(geneList6)

###enrichment analysis
enrich6<-clusterProfiler::enricher(geneList6,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)

enrichplot::dotplot(enrich6,showCategory=40,label_format=40)

enrich6_pairwise<-enrichplot::pairwise_termsim(enrich6)
enrichplot::emapplot(enrich6_pairwise)

2.6. InterPro enrichment analysis on sleuth

###make table with IPS annotations
ips_df <-funannot %>% select(TranscriptID,InterPro_new) %>% 
  mutate(InterPro_new = strsplit(InterPro_new, ", I")) %>%
        unnest(InterPro_new) %>% mutate(InterPro_new=str_replace(InterPro_new,"^PR","IPR")) %>%
  mutate(short_term = substr(InterPro_new, 1,40))


ips_data <- list(
    term2protein = data.frame(
                        term = ips_df$InterPro_new,
                        gene = ips_df$TranscriptID
                        ),
    term2name = data.frame(
                        term = ips_df$InterPro_new,
                        name = ips_df$short_term
                        ),
    
    universe = unique(as.character(ips_df$TranscriptID))
)

enrich7<-clusterProfiler::enricher(geneList5,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich7,showCategory=40,label_format=40)

* Upregulated in lichen as a graph

enrich7_pairwise<-enrichplot::pairwise_termsim(enrich7)
enrichplot::emapplot(enrich7_pairwise)

pdf(file="../results/lichen_upr.pdf",width=5,height=4)
enrichplot::emapplot(enrich7_pairwise,cex_label_category=0.4,cex_line=0.25,
                     shadowtext=F,cex_pie2axis=0.1,repel=T)
dev.off()
## quartz_off_screen 
##                 2
enrich8<-clusterProfiler::enricher(geneList6,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich8,showCategory=40,label_format=40)

* Upregulated in culture as a graph

enrich8_pairwise<-enrichplot::pairwise_termsim(enrich8)
enrichplot::emapplot(enrich8_pairwise)

pdf(file="../results/culture_upr.pdf",width=5,height=4)
enrichplot::emapplot(enrich8_pairwise,cex_label_category=0.4,cex_line=0.25,
                     shadowtext=F,cex_pie2axis=0.1,repel=T)
dev.off()
## quartz_off_screen 
##                 2

2.7. These enrichment plots need to be taken with a grain of salt

perc<-funannot %>% mutate(go_annotated=ifelse(!(is.na(GO.Terms_new)),"Annotated","Non-annotated"),
         ipr_annotated=ifelse(!(is.na(InterPro_new)),"Annotated","Non-annotated"),
         gene_set=case_when(
           TranscriptID %in% sig$target_id[sig$change=="culture"] ~ "Upregulated in culture",
          TranscriptID %in% sig$target_id[sig$change=="lichen"] ~ "Upregulated in lichen",
          T ~ "non-DGE"))

ipr_perc_dge<- perc %>% group_by(gene_set,ipr_annotated) %>% summarize(n=n()) %>% mutate(database="InterPro",annotated=ipr_annotated) %>% select(-ipr_annotated)
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
go_perc_dge <- perc %>% group_by(gene_set,go_annotated) %>% summarize(n=n()) %>% mutate(database="GO",annotated=go_annotated) %>% select(-go_annotated)
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
ipr_perc_all<- perc %>% group_by(ipr_annotated) %>% summarize(n=n()) %>% mutate(database="InterPro",gene_set="Whole transcriptome",annotated=ipr_annotated) %>% select(-ipr_annotated)
go_perc_all <- perc %>% group_by(go_annotated) %>% summarize(n=n()) %>% mutate(database="GO",gene_set="Whole transcriptome",annotated=go_annotated) %>% select(-go_annotated)
perc2<-rbind(ipr_perc_dge,go_perc_dge,ipr_perc_all,go_perc_all)
perc2$annotated<-factor(perc2$annotated,levels=c("Non-annotated","Annotated"))

ggplot(perc2 %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=database,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  facet_wrap(~gene_set)

* Instead of two datatbases, visualize the number of proteins with any kind of functional annotation * NB: counted tRNA as being annotated

#make bar graph for DGE
perc<-funannot %>% mutate(annotated=ifelse(!(is.na(GO.Terms_new))|                                       !(is.na(CAZyme_new))| !(is.na(PFAM_new))|
                !(is.na(Protease_new))|!(is.na(InterPro_new))| KO!=""|Feature=="tRNA",
                "Annotated","Non-annotated"),
         gene_set=case_when(
           TranscriptID %in% sig$target_id[sig$change=="culture"] ~ "Upregulated in culture",
          TranscriptID %in% sig$target_id[sig$change=="lichen"] ~ "Upregulated in lichen",
          T ~ "non-DGE"))

perc_dge<- perc %>% group_by(gene_set,annotated) %>% summarize(n=n()) %>%
  group_by(gene_set) %>% mutate(perc=(100*n)/sum(n))
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
dge_plot<-ggplot(perc_dge %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=gene_set,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  geom_text(aes(label = paste0(round(perc,0),"%"),color=annotated), position=position_stack(vjust = 0.5),size=7)+
  xlab("")+ylab("# of transcripts")+ggtitle("Differentially expressed")+
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+ theme_minimal()+
  theme(legend.title=element_blank(),legend.position = c(0.2, 0.8),
  legend.background = element_rect(fill="white",
                                 linewidth=0.5, linetype="solid", 
                                  colour ="grey"),
  panel.grid.major.x = element_blank(),
  text = element_text(size=12),
  axis.text=element_text(size=8))

#donut plot for the whole genome
perc_all <- perc %>% group_by(annotated) %>% summarize(n=n()) %>% mutate(perc=(100*n)/sum(n))
perc_all$ymax <- cumsum(perc_all$perc)
# Compute the bottom of each rectangle
perc_all$ymin <- c(0, head(perc_all$ymax, n=-1))
# Compute label position
perc_all$labelPosition <- (perc_all$ymax + perc_all$ymin) / 2
# Compute a good label
perc_all$label <- paste0(round(perc_all$perc,0), "%")
# Make the plot
all_plot<-ggplot(perc_all, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=annotated)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label,color=annotated), size=8) +
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+
  coord_polar(theta="y") + ggtitle("Whole transcriptome (n = 11,185)")+
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none",text = element_text(size=12))

library(patchwork)
dge_plot+all_plot

all_plot<-ggplot(perc_all, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=annotated)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label,color=annotated), size=2.75) +
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+
  coord_polar(theta="y") + ggtitle("Whole transcriptome (n = 11,185)")+
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none",text = element_text(size=8),plot.title = element_text(size=8))
dge_plot<-ggplot(perc_dge %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=gene_set,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  geom_text(aes(label = paste0(round(perc,0),"%"),color=annotated), position=position_stack(vjust = 0.5),size=2.75)+
  xlab("")+ylab("# of transcripts")+ggtitle("Differentially expressed")+
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+ theme_minimal()+
  theme(legend.title=element_blank(),legend.position = c(0.2, 0.8),
  legend.background = element_rect(fill="white",
                                 linewidth=0.5, linetype="solid", 
                                  colour ="grey"),
  panel.grid.major.x = element_blank(),
  text = element_text(size=8),
  axis.text=element_text(size=6),plot.title = element_text(size=8))

pdf(file="../results/annotated_percent.pdf",width=4.5,height=3.5)
dge_plot+all_plot
dev.off()
## quartz_off_screen 
##                 2

3. Create lists for unique genes

Identified 13 genes expressed in all lichen samples and none of the culture samples.

  • This list doesn’t include any potential effectors
lichen_unique<-counts_tab %>% filter(XBC2>0,XBA1>0,XSA2_2>0,XSC1>0,XSE2>0,XBE1>0,XTA2>0,XSC2>0,
  XSA2>0,XBA2>0,XBC1>0,XTA1>0,XSA1>0,XTC2>0,XTE2>0,XBE2>0,XMC2>0,
  S_21XB1==0,KS21XB1==0,S_21XB3==0,S_42XB1==0,S_42XB2==0,S_42XB3==0,
  KS48XB1==0,KS48XB2==0,KS48XB3==0,KS9XB1==0,KS9XB2==0,KS9XB3==0) %>%
  pivot_longer(-target_id,names_to="sample",values_to="counts") %>%
  filter(counts>0, sample !="MP_I",sample !="MP_II") %>% group_by(target_id) %>% summarise(avg_counts_lichen=mean(as.numeric(counts))) %>%  
  left_join(funannot, by=c("target_id"="TranscriptID")) %>%
  select(-c(CDS.transcript,Translation,gDNA,mRNA))
  
lichen_unique %>%
  kable(format = "html", col.names = colnames(lichen_unique)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
target_id avg_counts_lichen GeneID Feature Contig Start Stop Strand Name Product Alias.Synonyms EC_number BUSCO PFAM InterPro EggNog COG GO.Terms Secreted Membrane Protease CAZyme antiSMASH Notes ProtID CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new Secreted_new KO
XANPAGTX0501_001612-T1 46.99245 XANPAGTX0501_001612 mRNA Xp_GTX0501_2 2002693 2003141
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_001612_T1_XANPAGTX0501_001612 NA NA NA NA NA NA NA NA
XANPAGTX0501_002783-T1 18.68803 XANPAGTX0501_002783 mRNA Xp_GTX0501_4 808460 808865
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_002783_T1_XANPAGTX0501_002783 NA NA NA NA NA NA NA NA
XANPAGTX0501_003199-T1 20.17647 XANPAGTX0501_003199 mRNA Xp_GTX0501_5 87909 89177
AIM6_3 Altered inheritance of mitochondria protein 6 NA NA NA NA IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily;IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain ENOG503NZFJ S:(S) Function unknown GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA];GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] NA NA NA NA Cluster_2 NA XANPAGTX0501_003199_T1_XANPAGTX0501_003199 NA S:(S) Function unknown ENOG503NZFJ GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily, IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain NA NA NA
XANPAGTX0501_006970-T1 48.17647 XANPAGTX0501_006970 mRNA Xp_GTX0501_11 354394 354648
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_006970_T1_XANPAGTX0501_006970 NA NA NA NA NA NA NA NA
XANPAGTX0501_006972-T1 74.64706 XANPAGTX0501_006972 mRNA Xp_GTX0501_11 357523 357828
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_006972_T1_XANPAGTX0501_006972 NA NA NA NA NA NA NA NA
XANPAGTX0501_008008-T1 58.88235 XANPAGTX0501_008008 mRNA Xp_GTX0501_14 138465 139439
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_008008_T1_XANPAGTX0501_008008 NA NA NA NA NA NA NA NA
XANPAGTX0501_008856-T1 94.82353 XANPAGTX0501_008856 mRNA Xp_GTX0501_17 399345 400886
NA hypothetical protein NA NA NA NA IPR032675 Leucine-rich repeat domain superfamily NA NA NA NA NA NA NA Cluster_1 NA XANPAGTX0501_008856_T1_XANPAGTX0501_008856 NA NA ENOG503P83U GO_function: GO:0005515 - protein binding [Evidence IEA] IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily NA NA NA
XANPAGTX0501_009040-T1 57.41176 XANPAGTX0501_009040 mRNA Xp_GTX0501_18 366441 366695
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009040_T1_XANPAGTX0501_009040 NA NA NA NA NA NA NA NA
XANPAGTX0501_009438-T1 963.88235 XANPAGTX0501_009438 mRNA Xp_GTX0501_21 196714 197521
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009438_T1_XANPAGTX0501_009438 NA NA NA NA NA NA NA NA
XANPAGTX0501_009555-T1 1952.88235 XANPAGTX0501_009555 mRNA Xp_GTX0501_22 191143 191546
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009555_T1_XANPAGTX0501_009555 NA NA NA NA NA NA NA NA
XANPAGTX0501_009582-T1 17.00000 XANPAGTX0501_009582 mRNA Xp_GTX0501_22 265438 265938
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009582_T1_XANPAGTX0501_009582 NA NA NA NA NA NA NA NA
XANPAGTX0501_009957-T1 69.00000 XANPAGTX0501_009957 mRNA Xp_GTX0501_27 408 679
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA Cluster_1 NA XANPAGTX0501_009957_T1_XANPAGTX0501_009957 NA NA NA NA NA NA NA NA
XANPAGTX0501_010684-T1 45.11765 XANPAGTX0501_010684 mRNA Xp_GTX0501_46 20975 22156
NA hypothetical protein NA NA NA PF00067 IPR001128 Cytochrome P450;IPR002401 Cytochrome P450, E-class, group I;IPR017972 Cytochrome P450, conserved site;IPR036396 Cytochrome P450 superfamily ENOG503NWJC Q:(Q) Secondary metabolites biosynthesis, transport and catabolism GO_function: GO:0016705 - oxidoreductase activity, acting on paired donors, with incorporation or reduction of molecular oxygen [Evidence IEA];GO_function: GO:0020037 - heme binding [Evidence IEA];GO_function: GO:0005506 - iron ion binding [Evidence IEA];GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] NA NA NA NA NA NA XANPAGTX0501_010684_T1_XANPAGTX0501_010684 NA Q:(Q) Secondary metabolites biosynthesis, transport and catabolism ENOG503NWJC GO_function: GO:0005506 - iron ion binding [Evidence IEA], GO_function: GO:0016705 - oxidoreductase activity, acting on paired donors, with incorporation or reduction of molecular oxygen [Evidence IEA], GO_function: GO:0020037 - heme binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR001128 Cytochrome P450, IPR002401 Cytochrome P450, E-class, group I, IPR017972 Cytochrome P450, conserved site, IPR036396 Cytochrome P450 superfamily PF00067 NA NA
  • Most of these genes lack any annotation. There are three exceptions
  • Two are unclear:
    • A protein assigned to IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain. Function is unclear
    • A protein assigned to IPR001128 Cytochrome P450
  • XANPAGTX0501_008856-T1 is interesting
    • Has a F-box domain and a LRR (leucine-rich repeat) domain
    • This review of fungal F-box proteins says that such proteins
  • act as scavengers in the cell, collecting “junk” proteins to deliver to a “waste processor,” called the SCF complex, to which they dock through their F-box domain. In the SCF complex, the junk proteins are marked with ubiquitin for “incineration” in the proteasome. F-box proteins do not act indiscriminately but recruit specific, often modified proteins to the SCF complex and in this way regulate the level of certain proteins in a cell. F-box proteins are found in all eukaryotes and display a large variety of functions. In fungi they are, for example, involved in control of the cell division cycle, glucose sensing, mitochondrial connectivity, and control of the circadian clock

    • XANPAGTX0501_008856-T1 is from a lichen-enriched orthogroup
    • Expression profile
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008856-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Identified 1 gene that was highly expressed in all culture samples but not at all in the lichens

    culture_unique<-counts_tab %>% filter(XBC2==0,XBA1==0,XSA2_2==0,XSC1==0,XSE2==0,XBE1==0,XTA2==0,XSC2==0,
      XSA2==0,XBA2==0,XBC1==0,XTA1==0,XSA1==0,XTC2==0,XTE2==0,XBE2==0,XMC2==0,
      S_21XB1>0,KS21XB1>0,S_21XB3>0,S_42XB1>0,S_42XB2>0,S_42XB3>0,
      KS48XB1>0,KS48XB2>0,KS48XB3>0,KS9XB1>0,KS9XB2>0,KS9XB3>0) %>% pivot_longer(-target_id,names_to="sample",values_to="counts") %>%
      filter(counts>0, sample !="MP_I",sample !="MP_II") %>% group_by(target_id) %>% summarise(avg_counts_culture=mean(as.numeric(counts))) %>%  
      left_join(funannot, by=c("target_id"="TranscriptID")) %>%
      select(-c(CDS.transcript,Translation,gDNA,mRNA))
    
    culture_unique %>%
      kable(format = "html", col.names = colnames(culture_unique)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id avg_counts_culture GeneID Feature Contig Start Stop Strand Name Product Alias.Synonyms EC_number BUSCO PFAM InterPro EggNog COG GO.Terms Secreted Membrane Protease CAZyme antiSMASH Notes ProtID CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new Secreted_new KO
    XANPAGTX0501_008359-T2 967.2169 XANPAGTX0501_008359 mRNA Xp_GTX0501_15 271800 273051
    NA hypothetical protein NA NA NA PF03881 IPR011009 Protein kinase-like domain superfamily;IPR016477 Fructosamine/Ketosamine-3-kinase ENOG503P2EP G:(G) Carbohydrate transport and metabolism NA NA NA NA NA NA NA XANPAGTX0501_008359_T2_XANPAGTX0501_008359 NA G:(G) Carbohydrate transport and metabolism ENOG503NZHR, ENOG503P2EP NA IPR011009 Protein kinase-like domain superfamily, IPR016477 Fructosamine/Ketosamine-3-kinase PF03881 NA NA
  • According to InterPro, this protein is linked to protein repair
  • 4. Investigate selected genes

    4.1. Both gene models from the MAT-1-2-1 are upregulated in lichens

    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002103-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002104-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    4.2. G-alpha proteins

    • Of six proteins identified as G-alpha, none are in the DGE list. 4 have qval <0.05, but their b values are all between 0 and 1
    galpha<-funannot %>% filter(grepl("IPR001019",InterPro_new))
    
    sig %>% filter(target_id %in% galpha$TranscriptID)
    ## # A tibble: 5 × 12
    ##   target_id        pval     qval     b   se_b mean_obs var_obs tech_var sigma_sq
    ##   <chr>           <dbl>    <dbl> <dbl>  <dbl>    <dbl>   <dbl>    <dbl>    <dbl>
    ## 1 XANPAGTX050… 2.19e-14 3.18e-13 0.625 0.0819     8.08  0.130  0.000966   0.0323
    ## 2 XANPAGTX050… 6.74e- 9 4.96e- 8 0.582 0.100      7.59  0.153  0.000548   0.0702
    ## 3 XANPAGTX050… 4.12e- 4 1.26e- 3 0.491 0.139      7.97  0.192  0.00126    0.135 
    ## 4 XANPAGTX050… 2.57e- 3 6.48e- 3 0.233 0.0771     7.46  0.0288 0.000600   0.0152
    ## 5 XANPAGTX050… 2.58e- 2 4.90e- 2 0.210 0.0941     7.16  0.0712 0.000799   0.0615
    ## # ℹ 3 more variables: smooth_sigma_sq <dbl>, final_sigma_sq <dbl>, change <chr>

    4.4. Genes from lichen-enriched orthogroups

    • The list of lichen-enriched orthogroups, see ../02_mycobiont_genome/notebook/09_lichen_specific_genes.html
    • Heatmap showing all 1226 lichen-specific genes, regardles of DGE status
    funannot2<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/09_ortho/lichen_enriched_ortho_in_xanpa.tsv",sep="\t")
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_all.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=funannot2$TranscriptID[funannot2$lichen_ortho==T],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_all.png") 

    • Of them, 361 are differentially expressed. This is 29.5% (compared to 15.6370139% across the genome)
    og_sig<-sig %>% filter(target_id %in% funannot2$TranscriptID[funannot2$lichen_ortho==T])
    og_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      97
    ## 2 lichen      264
    ## 3 low_logFC   374
    • Heatmap with only DGE genes. Lichen samples are split based on substrates
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_dge.png",res=300,width=2000,height=1000)
    plot_transcript_heatmap(so, transcripts=og_sig$target_id[og_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_dge.png") 

    • Of 361 DGE lichen-specific genes, 168 have no functional annotation. This is 47%, compared to 0.3081806) across the whole genome

    4.5. Secondary metabolism clusters

    • How many genes are in the genome in total that are annotated as part of a SM gene cluster? 1239 in 59 clusters
      • NB: this is according to funannotate indexing. The number of regions displayed in the antiSMASH output is 49, since funannotate have split some of the ‘mixed’ clusters in to several)
      • Funannotate indexing here is misleading, because it didn’t handle correctly cases in which one gene is assigned to different clusters. I manually compiled a table instead
    funannot2 %>% group_by(antiSMASH,Contig) %>% summarize(n=n()) %>% filter(!is.na(antiSMASH))
    ## `summarise()` has grouped output by 'antiSMASH'. You can override using the
    ## `.groups` argument.
    ## # A tibble: 59 × 3
    ## # Groups:   antiSMASH [7]
    ##    antiSMASH Contig            n
    ##    <chr>     <chr>         <int>
    ##  1 Cluster_1 Xp_GTX0501_1     39
    ##  2 Cluster_1 Xp_GTX0501_10    20
    ##  3 Cluster_1 Xp_GTX0501_11    15
    ##  4 Cluster_1 Xp_GTX0501_12    12
    ##  5 Cluster_1 Xp_GTX0501_13    24
    ##  6 Cluster_1 Xp_GTX0501_14    11
    ##  7 Cluster_1 Xp_GTX0501_15    18
    ##  8 Cluster_1 Xp_GTX0501_17    24
    ##  9 Cluster_1 Xp_GTX0501_2     41
    ## 10 Cluster_1 Xp_GTX0501_24    44
    ## # ℹ 49 more rows
    • Save SM-related genes as a list
    sm_genes<-funannot2 %>% filter(!is.na(antiSMASH)) %>% mutate(cluster = paste0(Contig,"_",antiSMASH)) %>% select(-c(gDNA,mRNA,CDS.transcript,Translation,CAZyme,COG,EggNog,GO.Terms,InterPro,PFAM,Protease,Secreted))
    • 318 of them are differentially expressed. Weirdly enough, 30% of thouse are upregulated in culture!
    sm_sig<-sig %>% filter(target_id %in% sm_genes$TranscriptID)
    sm_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      96
    ## 2 lichen      222
    ## 3 low_logFC   379
    • Heatmap of those
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_SM_dge.png",res=300,width=2000,height=1000)
    plot_transcript_heatmap(so, transcripts=sm_sig$target_id[sm_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_SM_dge.png") 

    • Break down by cluster
    library(splitstackshape)
    antismash<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_pred/antismash/antismash_summary.txt")
    
    #get a list of all genes in each cluster
    antismash_gene_all<-antismash %>% mutate(start_num=as.numeric(gsub("^.*?_","",Start)),
                                             end_num=as.numeric(gsub("^.*?_","",End))) %>%
      mutate(gene_number=end_num-start_num+1) %>%
      expandRows("gene_number", drop = FALSE) %>%
      group_by(start_num,end_num) %>%
      mutate(gene_num = seq(start_num[1],end_num[1],by = 1)) %>%
      mutate(GeneID=ifelse(gene_num<1000,paste0("XANPAGTX0501_000",gene_num),
                                                ifelse(gene_num<10000,paste0("XANPAGTX0501_00",gene_num),
                                                paste0("XANPAGTX0501_0",gene_num)))) %>%
      mutate(Core_gene = ifelse(GeneID==Core_gene1|GeneID==Core_gene2,T,F)) %>% ungroup %>%
      select(GeneID,Cluster,Description,Core_gene)
    
    #add transcriptomic results
    summary_antismash_all<-antismash_gene_all %>% left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) %>%
      group_by(Cluster,change) %>% summarize(n=n()) %>% 
      pivot_wider(names_from=change,values_from=n,values_fill=0) %>%
      left_join(antismash)
    ## Joining with `by = join_by(GeneID)`
    ## `summarise()` has grouped output by 'Cluster'. You can override using the
    ## `.groups` argument.
    ## Joining with `by = join_by(Cluster)`
    • Looking at just core biosynthetic genes, weirdly some of them are upregulated in culture
    summary_antismash_core<-antismash_gene_all %>% filter(Core_gene==T) %>%
      left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) %>%
      mutate(change=ifelse(change %in% c("lichen","culture"),change,"Non-DGE")) %>%
      group_by(Cluster) %>% mutate(Core_gene_status=paste(change,collapse=",")) %>%
      select(Cluster,Core_gene_status) %>% distinct()
    ## Joining with `by = join_by(GeneID)`
    summary_antismash_core %>% group_by(Core_gene_status) %>%summarize(n=n())
    ## # A tibble: 6 × 2
    ##   Core_gene_status     n
    ##   <chr>            <int>
    ## 1 Non-DGE             35
    ## 2 Non-DGE,Non-DGE      2
    ## 3 culture              6
    ## 4 culture,culture      1
    ## 5 lichen              14
    ## 6 lichen,Non-DGE       1
    • In most clusters, the mojority is non DGE, as expected. Otherwise, the results are unexpectedly mixed.
      • 6 clusters are definitiely upregulated in lichen (core gene is upregulated in lichen, and no genes are upregulated in culture)
      • One is clearly upregulated in culture! Xp_GTX0501_8_Cluster_1 (unknown NRPS)
      • The rest is mixed
    summary_antismash_all<-summary_antismash_all %>% left_join(summary_antismash_core)
    ## Joining with `by = join_by(Cluster)`
    summary_antismash_all %>%
      kable(format = "html", col.names = colnames(summary_antismash_all)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    Cluster culture lichen low_logFC NA Description Start End Core_gene1 Core_gene2 KnownClusterBlast_MIBIG KnownClusterBlast_description KnownClusterBlast_Similarity KnownClusterBlast_includes_core Core_gene_status
    Xp_GTX0501_10_Cluster_1 2 12 4 2 NRPS-like XANPAGTX0501_006291 XANPAGTX0501_006310 XANPAGTX0501_006297 NA Non-DGE
    Xp_GTX0501_10_Cluster_2 2 9 6 8 T1PKS XANPAGTX0501_006308 XANPAGTX0501_006332 XANPAGTX0501_006319 BGC0002215 HEx-pks23 polyketide<ca> 0.22 1 lichen
    Xp_GTX0501_10_Cluster_3 0 2 14 11 NRPS-like XANPAGTX0501_006344 XANPAGTX0501_006369 XANPAGTX0501_006358 NA Non-DGE
    Xp_GTX0501_10_Cluster_4 2 1 14 9 T3PKS XANPAGTX0501_006734 XANPAGTX0501_006757 XANPAGTX0501_006744 NA Non-DGE
    Xp_GTX0501_11_Cluster_1 3 2 2 7 NRPS XANPAGTX0501_006840 XANPAGTX0501_006853 XANPAGTX0501_006845 BGC0002235 ACE1 0.11 1 lichen
    Xp_GTX0501_12_Cluster_1 1 4 2 5 T1PKS XANPAGTX0501_007263 XANPAGTX0501_007274 XANPAGTX0501_007266 BGC0000146 solanapyrone D 0.33 1 Non-DGE
    Xp_GTX0501_12_Cluster_2 0 9 12 11 T1PKS XANPAGTX0501_007334 XANPAGTX0501_007363 XANPAGTX0501_007346 NA Non-DGE,Non-DGE
    Xp_GTX0501_12_Cluster_3 3 1 4 5 terpene XANPAGTX0501_007388 XANPAGTX0501_007400 XANPAGTX0501_007395 NA Non-DGE
    Xp_GTX0501_13_Cluster_1 1 5 2 14 NRPS-like XANPAGTX0501_007869 XANPAGTX0501_007889 XANPAGTX0501_007880 NA Non-DGE
    Xp_GTX0501_14_Cluster_1 1 2 4 3 terpene XANPAGTX0501_007984 XANPAGTX0501_007993 XANPAGTX0501_007988 NA Non-DGE
    Xp_GTX0501_15_Cluster_1 1 7 8 3 NRPS-like XANPAGTX0501_008267 XANPAGTX0501_008284 XANPAGTX0501_008272 NA lichen
    Xp_GTX0501_17_Cluster_1 0 7 8 7 T1PKS XANPAGTX0501_008845 XANPAGTX0501_008866 XANPAGTX0501_008852 BGC0000156 TAN-1612 biosynthetic gene cluster fromAspergillus niger 0.6 1 lichen
    Xp_GTX0501_1_Cluster_1 5 6 12 15 fungal-RiPP-like XANPAGTX0501_000667 XANPAGTX0501_000702 XANPAGTX0501_000685 NA lichen
    Xp_GTX0501_1_Cluster_2 1 7 4 14 T1PKS XANPAGTX0501_000912 XANPAGTX0501_000937 XANPAGTX0501_000926 BGC0000146 solanapyrone D 0.5 0 Non-DGE
    Xp_GTX0501_24_Cluster_1 10 10 7 17 fungal-RiPP-like XANPAGTX0501_009693 XANPAGTX0501_009736 XANPAGTX0501_009715 XANPAGTX0501_009716 NA culture,culture
    Xp_GTX0501_25_Cluster_1 0 2 4 11 terpene XANPAGTX0501_009858 XANPAGTX0501_009874 XANPAGTX0501_009866 NA Non-DGE
    Xp_GTX0501_27_Cluster_1 1 2 12 7 NRPS-like XANPAGTX0501_009956 XANPAGTX0501_009977 XANPAGTX0501_009964 NA Non-DGE
    Xp_GTX0501_2_Cluster_1 2 6 15 16 fungal-RiPP-like XANPAGTX0501_001046 XANPAGTX0501_001083 XANPAGTX0501_001064 NA lichen
    Xp_GTX0501_2_Cluster_2 1 3 10 14 NRPS XANPAGTX0501_001225 XANPAGTX0501_001250 XANPAGTX0501_001239 NA Non-DGE
    Xp_GTX0501_2_Cluster_3 1 1 8 14 NRPS-like XANPAGTX0501_001291 XANPAGTX0501_001314 XANPAGTX0501_001305 NA Non-DGE
    Xp_GTX0501_2_Cluster_4 0 0 7 6 terpene XANPAGTX0501_001616 XANPAGTX0501_001628 XANPAGTX0501_001622 NA Non-DGE
    Xp_GTX0501_2_Cluster_5 0 2 3 18 T1PKS XANPAGTX0501_001669 XANPAGTX0501_001691 XANPAGTX0501_001679 NA Non-DGE
    Xp_GTX0501_2_Cluster_6 0 0 5 19 NRP-metallophore XANPAGTX0501_001682 XANPAGTX0501_001705 XANPAGTX0501_001696 XANPAGTX0501_001694 NA Non-DGE,Non-DGE
    Xp_GTX0501_2_Cluster_7 0 0 5 16 NRPS XANPAGTX0501_001682 XANPAGTX0501_001702 XANPAGTX0501_001694 NA Non-DGE
    Xp_GTX0501_35_Cluster_1 6 3 3 9 NRPS-like XANPAGTX0501_010404 XANPAGTX0501_010423 XANPAGTX0501_010409 NA culture
    Xp_GTX0501_35_Cluster_2 2 8 4 9 T1PKS XANPAGTX0501_010418 XANPAGTX0501_010440 XANPAGTX0501_010430 NA lichen
    Xp_GTX0501_36_Cluster_1 2 4 9 11 T1PKS XANPAGTX0501_010449 XANPAGTX0501_010474 XANPAGTX0501_010464 BGC0001242 oxyjavanicin 0.25 1 Non-DGE
    Xp_GTX0501_3_Cluster_1 10 8 0 3 NRPS XANPAGTX0501_001961 XANPAGTX0501_001981 XANPAGTX0501_001972 BGC0001741 phyllostictine A/phyllostictine B 0.5 1 culture
    Xp_GTX0501_3_Cluster_2 10 6 0 4 T1PKS XANPAGTX0501_001963 XANPAGTX0501_001982 XANPAGTX0501_001973 BGC0001741 phyllostictine A/phyllostictine B 0.5 1 culture
    Xp_GTX0501_3_Cluster_3 0 7 1 6 terpene XANPAGTX0501_002180 XANPAGTX0501_002193 XANPAGTX0501_002188 NA Non-DGE
    Xp_GTX0501_41_Cluster_1 0 1 9 3 NRPS-like XANPAGTX0501_010609 XANPAGTX0501_010621 XANPAGTX0501_010621 NA lichen
    Xp_GTX0501_43_Cluster_1 0 4 4 16 T1PKS XANPAGTX0501_010622 XANPAGTX0501_010644 XANPAGTX0501_010632 NA Non-DGE
    Xp_GTX0501_48_Cluster_1 3 6 2 8 NRPS-like XANPAGTX0501_010686 XANPAGTX0501_010704 XANPAGTX0501_010692 NA culture
    Xp_GTX0501_4_Cluster_1 1 0 3 9 terpene XANPAGTX0501_002454 XANPAGTX0501_002466 XANPAGTX0501_002460 NA Non-DGE
    Xp_GTX0501_4_Cluster_2 0 6 15 12 NRPS-like XANPAGTX0501_002475 XANPAGTX0501_002501 XANPAGTX0501_002488 NA lichen
    Xp_GTX0501_4_Cluster_3 0 1 6 7 terpene XANPAGTX0501_002520 XANPAGTX0501_002531 XANPAGTX0501_002527 BGC0001839 squalestatin S1 0.4 1 Non-DGE
    Xp_GTX0501_4_Cluster_4 0 3 10 13 NRPS XANPAGTX0501_002525 XANPAGTX0501_002549 XANPAGTX0501_002535 BGC0002164 peramine 1 1 lichen
    Xp_GTX0501_4_Cluster_5 0 3 5 17 NRPS XANPAGTX0501_003108 XANPAGTX0501_003131 XANPAGTX0501_003119 NA lichen
    Xp_GTX0501_50_Cluster_1 0 2 1 3 T1PKS XANPAGTX0501_010705 XANPAGTX0501_010710 XANPAGTX0501_010709 BGC0000146 solanapyrone D 0.33 1 Non-DGE
    Xp_GTX0501_52_Cluster_1 2 3 1 2 NRPS XANPAGTX0501_010711 XANPAGTX0501_010717 XANPAGTX0501_010715 NA Non-DGE
    Xp_GTX0501_5_Cluster_1 4 5 2 9 T1PKS XANPAGTX0501_003170 XANPAGTX0501_003189 XANPAGTX0501_003176 BGC0002515 solanapyrone A 0.4 1 culture
    Xp_GTX0501_5_Cluster_2 7 5 7 7 T1PKS XANPAGTX0501_003179 XANPAGTX0501_003204 XANPAGTX0501_003194 NA Non-DGE
    Xp_GTX0501_5_Cluster_3 0 3 7 16 NRPS,T1PKS XANPAGTX0501_003287 XANPAGTX0501_003311 XANPAGTX0501_003300 BGC0001067 fumagillin/_-trans-bergamotene/fumagillol 0.2 0 Non-DGE
    Xp_GTX0501_5_Cluster_4 2 2 11 13 NRPS-like XANPAGTX0501_003361 XANPAGTX0501_003388 XANPAGTX0501_003374 NA lichen
    Xp_GTX0501_6_Cluster_1 0 3 10 10 T1PKS XANPAGTX0501_004370 XANPAGTX0501_004390 XANPAGTX0501_004380 NA Non-DGE
    Xp_GTX0501_6_Cluster_2 0 4 7 13 T1PKS XANPAGTX0501_004397 XANPAGTX0501_004420 XANPAGTX0501_004410 NA Non-DGE
    Xp_GTX0501_7_Cluster_1 0 4 10 12 T1PKS XANPAGTX0501_004611 XANPAGTX0501_004634 XANPAGTX0501_004622 BGC0001258 1,3,6,8-tetrahydroxynaphthalene 1 1 Non-DGE
    Xp_GTX0501_7_Cluster_2 0 2 7 3 terpene XANPAGTX0501_004765 XANPAGTX0501_004776 XANPAGTX0501_004770 NA lichen
    Xp_GTX0501_7_Cluster_3 1 9 9 13 T1PKS XANPAGTX0501_005055 XANPAGTX0501_005086 XANPAGTX0501_005070 NA Non-DGE
    Xp_GTX0501_7_Cluster_4 1 7 7 10 NRPS-like XANPAGTX0501_005071 XANPAGTX0501_005095 XANPAGTX0501_005087 NA Non-DGE
    Xp_GTX0501_7_Cluster_5 1 8 7 8 T1PKS XANPAGTX0501_005075 XANPAGTX0501_005098 XANPAGTX0501_005090 NA Non-DGE
    Xp_GTX0501_8_Cluster_1 9 0 4 11 NRPS XANPAGTX0501_005105 XANPAGTX0501_005128 XANPAGTX0501_005116 NA culture
    Xp_GTX0501_8_Cluster_2 1 6 9 13 T1PKS XANPAGTX0501_005240 XANPAGTX0501_005265 XANPAGTX0501_005252 NA lichen
    Xp_GTX0501_8_Cluster_3 1 5 10 13 T1PKS XANPAGTX0501_005280 XANPAGTX0501_005306 XANPAGTX0501_005291 BGC0000046 depudecin 0.33 0 Non-DGE
    Xp_GTX0501_8_Cluster_4 5 4 11 15 fungal-RiPP-like XANPAGTX0501_005329 XANPAGTX0501_005361 XANPAGTX0501_005349 NA Non-DGE
    Xp_GTX0501_8_Cluster_5 1 3 4 5 terpene XANPAGTX0501_005377 XANPAGTX0501_005388 XANPAGTX0501_005382 NA lichen,Non-DGE
    Xp_GTX0501_9_Cluster_1 0 4 3 14 T1PKS XANPAGTX0501_005732 XANPAGTX0501_005752 XANPAGTX0501_005742 BGC0001258 1,3,6,8-tetrahydroxynaphthalene 1 1 Non-DGE
    Xp_GTX0501_9_Cluster_2 2 0 5 16 NRPS XANPAGTX0501_005742 XANPAGTX0501_005764 XANPAGTX0501_005751 NA Non-DGE
    Xp_GTX0501_9_Cluster_3 2 3 12 11 T1PKS XANPAGTX0501_005922 XANPAGTX0501_005948 XANPAGTX0501_005935 BGC0002194 epipyrone A 0.75 1 Non-DGE
    • The list of definitiely upregulated in lichen
      • 2 of them have mathces to known clusters and are discussed below
    summary_antismash_all %>% filter(culture==0,lichen>0,grepl("lichen",Core_gene_status))  %>% 
      kable(format = "html", col.names = colnames(summary_antismash_all)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    Cluster culture lichen low_logFC NA Description Start End Core_gene1 Core_gene2 KnownClusterBlast_MIBIG KnownClusterBlast_description KnownClusterBlast_Similarity KnownClusterBlast_includes_core Core_gene_status
    Xp_GTX0501_17_Cluster_1 0 7 8 7 T1PKS XANPAGTX0501_008845 XANPAGTX0501_008866 XANPAGTX0501_008852 BGC0000156 TAN-1612 biosynthetic gene cluster fromAspergillus niger 0.6 1 lichen
    Xp_GTX0501_41_Cluster_1 0 1 9 3 NRPS-like XANPAGTX0501_010609 XANPAGTX0501_010621 XANPAGTX0501_010621 NA lichen
    Xp_GTX0501_4_Cluster_2 0 6 15 12 NRPS-like XANPAGTX0501_002475 XANPAGTX0501_002501 XANPAGTX0501_002488 NA lichen
    Xp_GTX0501_4_Cluster_4 0 3 10 13 NRPS XANPAGTX0501_002525 XANPAGTX0501_002549 XANPAGTX0501_002535 BGC0002164 peramine 1 1 lichen
    Xp_GTX0501_4_Cluster_5 0 3 5 17 NRPS XANPAGTX0501_003108 XANPAGTX0501_003131 XANPAGTX0501_003119 NA lichen
    Xp_GTX0501_7_Cluster_2 0 2 7 3 terpene XANPAGTX0501_004765 XANPAGTX0501_004776 XANPAGTX0501_004770 NA lichen

    Parietin cluster

    • Cross-referencing with Theo Llewellyn’s paper. In his analysis of many lecanoromycete genomes, he identified putative anthraquinone BGC as those similar to 4 MIBiG clusters:
      • the anthraquinones emodin in Escovopsis weberi
      • asperthecin in Aspergillus nidulans FGSC A4
      • endocrocin/clavorubin in Claviceps purpurea
      • the structurally related alternariol in Aspergillus nidulans FGSC A4 and TAN-1612 Aspergillus niger
    • Our genome has one such cluster: Xp_GTX0501_17_Cluster_1 is a TPK1S cluster similar to TAN-1612
    • This cluster is one of the 6 definitely upregulated in lichen
    • This matches Theo’s results, since he also shown one such cluster in the X. parietina from JGI
    • Also the structure of the gene cluster matches what Theo describes as a conserved 4-gene structure unique to Teloschistales:
      • FUN_008850: a transporter with an ABC-casset (IPR003439 ABC transporter-like, ATP-binding domain); - strand
      • FUN_008851: metallo-beta-lactamase-type thioesterase (IPR001279 Metallo-beta-lactamase)
      • FUN_008852: PKS-synthase with SAT-KS-AT-PT-ACP domains (starter unit acyltransferase domain - ketoacyl synthase - acyltransferase - product template - acyl carrier protein)
      • FUN_008853: a dehydratase with a single EthylD domain (IPR009799 EthD domain)
    • None of other PKS clusters had this structure
    • All four of the genes are upregulated in lichen
    sig %>% filter(target_id %in% c("XANPAGTX0501_008850-T1","XANPAGTX0501_008851-T1","XANPAGTX0501_008852-T1","XANPAGTX0501_008853-T1")) %>% select(target_id,qval,b,change)
    ## # A tibble: 4 × 4
    ##   target_id                  qval     b change
    ##   <chr>                     <dbl> <dbl> <chr> 
    ## 1 XANPAGTX0501_008850-T1 6.06e-22 -1.70 lichen
    ## 2 XANPAGTX0501_008851-T1 5.87e-21 -1.93 lichen
    ## 3 XANPAGTX0501_008852-T1 1.87e-22 -2.08 lichen
    ## 4 XANPAGTX0501_008853-T1 3.26e-69 -4.00 lichen
    • Boxplots show clearly that it is upregulated in lichen
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008850-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008851-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008852-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008853-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Peramine-like cluster

    • Main biosynthetic gene from Xp_GTX0501_4_Cluster_4 (XANPAGTX0501_002535) is 65% identical to peramine synthase (evalue=0.0). However, it’s much shorter than the reference (1425 aa vs 2773 aa in the reference)
    • This gene is from a lichen-enriched orthogroup
    • Peramine is an anti-insect alcaloid from Epichloe, which is a mutualistic endophyte. Thought to be offer the host plant protection from herbivory
    • Some lichen compaunds are apparently alkaloids, including rhizocarpic acid, but none of these are known from Xanthoria. There is one report showing trace amounts of alkaloids in Xanthoria
    • This paper describes similar cluster from Cladonia grayi
    • They also report similar cluster from Xanthoria parietina, but deem it non-functional or producing something other than peramine. The reason for that is two missing domains: A1 (first of the adenylylation domains; peramine synthase from Cladonia has two of them, while Xanthoria only one) and R (reductase domain). In total, their annotation has 3 domains:
      • C (condensation domain)
      • A (adenylylation domain)
      • M (methylation domain)
    • According to antiSMASH, our putative peramine synthase has four domains. First three are the same as in the paper, but the last (CP or PCP) is an addition
      • C (condensation domain)
      • A (adenylylation domain)
      • nMT (nitrogen methyltransferase)
      • CP (peptidyl-carrier protein domain)
    • I ran antiSMASH on Cladonia grayi to compare. There, putative peramine synthase has 6 domains: A, CP, C, nMT, CP, NAD. The length of the Cladonia protein is close to that of the reference
    • Conclusion: this cluster is likely functional, but producing something other than peramine

    4.6. Protein Kinases

    • In total, the genome has 144 proteins with a protein kinase domain. Of them, 24 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR000719",InterPro)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE          115
    ## 2 TRUE            19
    • Lichen and culture have similar number of kinases upregulated
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR000719",InterPro)) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      11
    ## 2 lichen       10
    ## 3 low_logFC    53
    ## 4 <NA>         60
    • How many of them are serine/threonine kinases? In culture 9 (out of 11) upregulated kinases are serine/threonine kinases, in lichens it’s only 4
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR008271",InterPro)) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       9
    ## 2 lichen        4
    ## 3 low_logFC    42
    ## 4 <NA>         47
    • List them. The list includes one lichen-upregulated “MAP kinase-activated protein kinase”
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR008271",InterPro),change %in% c("culture","lichen")) %>%
      select(TranscriptID,b,change,Product,InterPro) %>%
      kable(format = "html", col.names = c("TranscriptID","b","change","Product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change Product InterPro
    XANPAGTX0501_000131-T1 3.173122 culture hypothetical protein IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR011990 Tetratricopeptide-like helical domain superfamily;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_002130-T1 1.107301 culture hypothetical protein IPR000719 Protein kinase domain;IPR001245 Serine-threonine/tyrosine-protein kinase, catalytic domain;IPR002110 Ankyrin repeat;IPR006597 Sel1-like repeat;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR011990 Tetratricopeptide-like helical domain superfamily;IPR020683 Domain of unknown function DUF3447;IPR036770 Ankyrin repeat-containing domain superfamily
    XANPAGTX0501_002999-T3 1.110073 culture Serine/threonine-protein kinase, variant 3 IPR000008 C2 domain;IPR000719 Protein kinase domain;IPR000961 AGC-kinase, C-terminal;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site;IPR017892 Protein kinase, C-terminal;IPR035892 C2 domain superfamily
    XANPAGTX0501_003063-T3 1.307124 culture Cytochrome c oxidase subunit 1, variant 3 IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_003150-T1 1.034833 culture hypothetical protein IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_003466-T1 -1.376968 lichen hypothetical protein IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_003979-T1 1.608051 culture Serine/threonine protein kinase IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_004521-T1 -1.452240 lichen hypothetical protein IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_004525-T1 -1.003000 lichen hypothetical protein IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_005890-T1 1.279397 culture hypothetical protein IPR000719 Protein kinase domain;IPR002110 Ankyrin repeat;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR020683 Domain of unknown function DUF3447;IPR036770 Ankyrin repeat-containing domain superfamily
    XANPAGTX0501_006750-T1 1.049144 culture hypothetical protein IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_007777-T2 1.360843 culture eukaryotic translation initiation factor 2-alpha kinase IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR016255 eIF-2-alpha kinase Gcn2;IPR017441 Protein kinase, ATP binding site;IPR024435 Histidyl tRNA synthetase-related domain;IPR036621 Anticodon-binding domain superfamily;IPR041715 Class II Histidinyl-tRNA synthetase (HisRS)-like catalytic core domain
    XANPAGTX0501_010496-T1 -3.360677 lichen MAP kinase-activated protein kinase 2 IPR000253 Forkhead-associated (FHA) domain;IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR008984 SMAD/FHA domain superfamily;IPR011009 Protein kinase-like domain superfamily
    • All other MAPKs that were found in the genome aren’t DGE
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR017240",InterPro)|grepl("IPR003527",InterPro) | 
              grepl( "IPR008352",InterPro) | grepl( "IPR038783",InterPro)) %>%
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_003276-T1 -0.2487408 low_logFC mitogen-activated serine/threonine-protein kinase IPR000719 Protein kinase domain;IPR003527 Mitogen-activated protein (MAP) kinase, conserved site;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_003462-T1 NA NA mitogen activated protein kinase IPR000719 Protein kinase domain;IPR003527 Mitogen-activated protein (MAP) kinase, conserved site;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_009386-T1 NA NA Suppressor of Sensor Kinase (SLN1) IPR000719 Protein kinase domain;IPR008271 Serine/threonine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily;IPR017240 Mitogen-activated protein (MAP) kinase kinase kinase Ssk2/Ssk22;IPR017441 Protein kinase, ATP binding site
    XANPAGTX0501_010261-T1 0.5642185 low_logFC MAPK protein hog1 IPR000719 Protein kinase domain;IPR003527 Mitogen-activated protein (MAP) kinase, conserved site;IPR008271 Serine/threonine-protein kinase, active site;IPR008352 Mitogen-activated protein (MAP) kinase p38-like;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site;IPR038783 Fungal mitogen-activated protein (MAP) kinase Sty1/Hog1
    XANPAGTX0501_010261-T2 -0.4922589 low_logFC MAPK protein hog1, variant 2 IPR000719 Protein kinase domain;IPR003527 Mitogen-activated protein (MAP) kinase, conserved site;IPR008271 Serine/threonine-protein kinase, active site;IPR008352 Mitogen-activated protein (MAP) kinase p38-like;IPR011009 Protein kinase-like domain superfamily;IPR017441 Protein kinase, ATP binding site;IPR038783 Fungal mitogen-activated protein (MAP) kinase Sty1/Hog1
    • The remaining kinases in culture are both undefined any further
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR000719",InterPro),change =="culture") %>%
      filter(!grepl("IPR008271",InterPro)) %>% 
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_000132-T1 3.167267 culture hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_005889-T1 3.211218 culture hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily
    • In lichens, those are:
      • two of them are not defined further
      • one AGC-kinase, one Tyr kinase
      • one with HeLo domain, one with Ankyrin and TPR repeats, one with von Willebrand factor A-like domain (from blood proteins)
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR000719",InterPro),change =="lichen") %>%
      filter(!grepl("IPR008271",InterPro)) %>% 
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_001006-T1 -3.826268 lichen hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily;IPR029498 Prion-inhibition and propagation, HeLo domain;IPR038305 HeLo domain superfamily
    XANPAGTX0501_001007-T1 -3.548226 lichen hypothetical protein IPR000719 Protein kinase domain;IPR002110 Ankyrin repeat;IPR011009 Protein kinase-like domain superfamily;IPR011990 Tetratricopeptide-like helical domain superfamily;IPR013026 None;IPR019734 Tetratricopeptide repeat;IPR020683 Domain of unknown function DUF3447;IPR036770 Ankyrin repeat-containing domain superfamily
    XANPAGTX0501_004853-T1 -1.196511 lichen hypothetical protein IPR000719 Protein kinase domain;IPR000961 AGC-kinase, C-terminal;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_009558-T1 -1.573738 lichen hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily;IPR036465 von Willebrand factor A-like domain superfamily
    XANPAGTX0501_010435-T1 -1.088118 lichen hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_010556-T1 -2.886129 lichen hypothetical protein IPR000719 Protein kinase domain;IPR011009 Protein kinase-like domain superfamily
    XANPAGTX0501_010558-T1 -3.182102 lichen hypothetical protein IPR000719 Protein kinase domain;IPR008266 Tyrosine-protein kinase, active site;IPR011009 Protein kinase-like domain superfamily

    4.7. Transporters

    Sugar and polyol transporters

    • In total, the genome has 38 proteins from the Major facilitator, sugar transporter-like family. This is in line with what Philipp identified in other lecanoromycetes. Of them, 5 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR005828",InterPro_new)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE           33
    ## 2 TRUE             5
    • List all lichen-specific
    funannot2 %>% filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% select(TranscriptID,InterPro_new)
    ##             TranscriptID
    ## 1 XANPAGTX0501_001653-T1
    ## 2 XANPAGTX0501_005571-T1
    ## 3 XANPAGTX0501_005572-T1
    ## 4 XANPAGTX0501_008617-T1
    ## 5 XANPAGTX0501_010000-T1
    ##                                                                                                                                                                                                                    InterPro_new
    ## 1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 2                                              IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 3                                                                                    IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 4 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 5                                              IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    • Six of them upregulated in lichen, 2 in culture
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new)) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       2
    ## 2 lichen        6
    ## 3 low_logFC    16
    ## 4 <NA>         14
    • Of the 5 lichen-enriched, 3 were upregulated in lichens
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 lichen        3
    ## 2 low_logFC     2
    • Remaining 2 were more present in lichen, but had b-valie between -1 and 0
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% select(TranscriptID,change,b,qval)
    ##             TranscriptID    change          b         qval
    ## 1 XANPAGTX0501_001653-T1    lichen -2.0408199 3.742386e-12
    ## 2 XANPAGTX0501_005571-T1    lichen -1.9797831 6.700776e-05
    ## 3 XANPAGTX0501_005572-T1    lichen -1.9025473 1.986579e-05
    ## 4 XANPAGTX0501_008617-T1 low_logFC -0.5728169 3.266494e-07
    ## 5 XANPAGTX0501_010000-T1 low_logFC -0.6135197 2.741270e-04
    • Identitified putative polyol transporter with blast.
      • Used 4 proteins as queries, following Philipp: “AAX98668.1; from Ambrosiozyma monospora, CAR65543.1, CAG86001.1; from Debaryomyces hansenii, NP_010036.1; from Saccharomyces cerevisiae”, saved as analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt
    sbatch --mem=5G -c 1 --wrap="source package d6092385-3a81-49d9-b044-8ffb85d0c446; blastp -query analysis_and_temp_files/08_dge_culture_lichen/polyol_transporters_genbank.fa -subject ../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_pred/annotate_results/Xanthoria_parietina_GTX0501.proteins.fa -outfmt 6 -out analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt -evalue 1e-100"
    • Returned 9 proteins (from 8 genes) with matches with e-value<1e-100
    polyol_blast<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt",header=F)
    polyol_blast<-polyol_blast %>% select(V1,V2,V11,V12)
    colnames(polyol_blast)<-c("query","subject","e-value","score")
    polyol_blast
    ##         query                subject   e-value score
    ## 1  AAX98668.1 XANPAGTX0501_010000-T1 1.49e-179   516
    ## 2  AAX98668.1 XANPAGTX0501_001653-T1 5.32e-176   508
    ## 3  AAX98668.1 XANPAGTX0501_008617-T1 2.52e-114   350
    ## 4  CAR65543.1 XANPAGTX0501_005996-T1       0.0   602
    ## 5  CAR65543.1 XANPAGTX0501_007564-T1 9.63e-178   519
    ## 6  CAR65543.1 XANPAGTX0501_009339-T1 5.39e-151   449
    ## 7  CAR65543.1 XANPAGTX0501_009339-T2 2.57e-149   442
    ## 8  CAR65543.1 XANPAGTX0501_000906-T1 4.21e-145   434
    ## 9  CAR65543.1 XANPAGTX0501_001701-T1 7.68e-133   402
    ## 10 CAG86001.1 XANPAGTX0501_005996-T1       0.0   591
    ## 11 CAG86001.1 XANPAGTX0501_007564-T1 2.80e-178   520
    ## 12 CAG86001.1 XANPAGTX0501_009339-T1 1.51e-152   453
    ## 13 CAG86001.1 XANPAGTX0501_009339-T2 2.52e-150   445
    ## 14 CAG86001.1 XANPAGTX0501_000906-T1 3.81e-146   436
    ## 15 CAG86001.1 XANPAGTX0501_001701-T1 1.48e-132   401
    • Of them 3 were from lichen-enriched orthogroups and 1 was DGE and upregulated in lichen (and also from a lichen-enriched orthogroup)
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(TranscriptID %in% polyol_blast$subject) %>% select(TranscriptID,change,b,qval,lichen_ortho)
    ##             TranscriptID    change          b         qval lichen_ortho
    ## 1 XANPAGTX0501_000906-T1      <NA>         NA           NA        FALSE
    ## 2 XANPAGTX0501_001653-T1    lichen -2.0408199 3.742386e-12         TRUE
    ## 3 XANPAGTX0501_001701-T1 low_logFC  0.6237851 2.869476e-06        FALSE
    ## 4 XANPAGTX0501_005996-T1 low_logFC  0.4003563 2.088997e-06        FALSE
    ## 5 XANPAGTX0501_007564-T1 low_logFC  0.2966283 4.581743e-02        FALSE
    ## 6 XANPAGTX0501_008617-T1 low_logFC -0.5728169 3.266494e-07         TRUE
    ## 7 XANPAGTX0501_009339-T1      <NA>         NA           NA        FALSE
    ## 8 XANPAGTX0501_009339-T2      <NA>         NA           NA        FALSE
    ## 9 XANPAGTX0501_010000-T1 low_logFC -0.6135197 2.741270e-04         TRUE
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_001653-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Ammonium transporters

    • In total, the genome has 2 proteins from the Ammonium transporter, neither is from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR001905",InterPro_new)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 1 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE            2
    • One of them is upregulated in lichen
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR001905",InterPro_new)) %>% select(TranscriptID,change,b,qval)
    ##             TranscriptID change         b         qval
    ## 1 XANPAGTX0501_004972-T1 lichen -2.248389 3.681322e-56
    ## 2 XANPAGTX0501_009385-T1   <NA>        NA           NA

    All transporters

    • 41 upregulated in lichen, 12 of them (29%) from lichen-enriched orthogroups
    transport_lic<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("transport",InterPro_new),change=="lichen") %>% select(TranscriptID,InterPro_new,lichen_ortho)
    transport_lic %>% 
      kable(format = "html", col.names = c("TranscriptID","InterPro_new","lichen_ortho")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID InterPro_new lichen_ortho
    XANPAGTX0501_000142-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_000342-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000342-T2 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000684-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_000704-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001082-T1 IPR004853 Sugar phosphate transporter domain FALSE
    XANPAGTX0501_001448-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001653-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_002486-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_003065-T3 IPR005178 Organic solute transporter subunit alpha/Transmembrane protein 184 FALSE
    XANPAGTX0501_003337-T2 IPR004738 Phosphate permease, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004383-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_004428-T1 IPR004695 Transporter protein SLAC1/Mae1/ Ssu1/TehA, IPR030185 Malic acid transport protein, IPR038665 Voltage-dependent anion channel superfamily FALSE
    XANPAGTX0501_004624-T1 IPR004737 Nitrate transporter NarK/NarU-like, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004972-T1 IPR001905 Ammonium transporter, IPR018047 Ammonium transporter, conserved site, IPR024041 Ammonium transporter AmtB-like domain, IPR029020 Ammonium/urea transporter FALSE
    XANPAGTX0501_004996-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005027-T1 IPR003370 Chromate transporter, IPR014047 Chromate transporter, long chain FALSE
    XANPAGTX0501_005378-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005489-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005571-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005572-T1 IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005708-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006293-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006324-T1 IPR001204 Phosphate transporter FALSE
    XANPAGTX0501_006367-T1 IPR005829 Sugar transporter, conserved site, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006527-T1 IPR000791 Acetate transporter GPR1/FUN34/SatP family FALSE
    XANPAGTX0501_007882-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_007969-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_008850-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR010929 CDR ABC transporter, IPR013525 ABC-2 type transporter, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029481 ABC-transporter, N-terminal domain, IPR034001 ATP-binding cassette transporter, PDR-like subfamily G, domain 1, IPR034003 ATP-binding cassette transporter, PDR-like subfamily G, domain 2 FALSE
    XANPAGTX0501_009192-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009193-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009202-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009320-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009472-T1 IPR005018 DOMON domain, IPR006593 Cytochrome b561/ferric reductase transmembrane, IPR015920 Cellobiose dehydrogenase, cytochrome domain, IPR018825 Domain of unknown function DUF2427, IPR036259 MFS transporter superfamily, IPR038697 None FALSE
    XANPAGTX0501_009914-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010175-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010196-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010381-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_010444-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010683-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010706-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR011527 ABC transporter type 1, transmembrane domain, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036640 ABC transporter type 1, transmembrane domain superfamily FALSE
    • 35 upregulated in lichen, 3 of them (8.6%) from lichen-enriched orthogroups
    transport_cul<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("transport",InterPro_new),change=="culture") %>% select(TranscriptID,InterPro_new,lichen_ortho)
    transport_lic %>% 
      kable(format = "html", col.names = c("TranscriptID","InterPro_new","lichen_ortho")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID InterPro_new lichen_ortho
    XANPAGTX0501_000142-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_000342-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000342-T2 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000684-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_000704-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001082-T1 IPR004853 Sugar phosphate transporter domain FALSE
    XANPAGTX0501_001448-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001653-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_002486-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_003065-T3 IPR005178 Organic solute transporter subunit alpha/Transmembrane protein 184 FALSE
    XANPAGTX0501_003337-T2 IPR004738 Phosphate permease, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004383-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_004428-T1 IPR004695 Transporter protein SLAC1/Mae1/ Ssu1/TehA, IPR030185 Malic acid transport protein, IPR038665 Voltage-dependent anion channel superfamily FALSE
    XANPAGTX0501_004624-T1 IPR004737 Nitrate transporter NarK/NarU-like, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004972-T1 IPR001905 Ammonium transporter, IPR018047 Ammonium transporter, conserved site, IPR024041 Ammonium transporter AmtB-like domain, IPR029020 Ammonium/urea transporter FALSE
    XANPAGTX0501_004996-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005027-T1 IPR003370 Chromate transporter, IPR014047 Chromate transporter, long chain FALSE
    XANPAGTX0501_005378-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005489-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005571-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005572-T1 IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005708-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006293-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006324-T1 IPR001204 Phosphate transporter FALSE
    XANPAGTX0501_006367-T1 IPR005829 Sugar transporter, conserved site, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006527-T1 IPR000791 Acetate transporter GPR1/FUN34/SatP family FALSE
    XANPAGTX0501_007882-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_007969-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_008850-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR010929 CDR ABC transporter, IPR013525 ABC-2 type transporter, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029481 ABC-transporter, N-terminal domain, IPR034001 ATP-binding cassette transporter, PDR-like subfamily G, domain 1, IPR034003 ATP-binding cassette transporter, PDR-like subfamily G, domain 2 FALSE
    XANPAGTX0501_009192-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009193-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009202-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009320-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009472-T1 IPR005018 DOMON domain, IPR006593 Cytochrome b561/ferric reductase transmembrane, IPR015920 Cellobiose dehydrogenase, cytochrome domain, IPR018825 Domain of unknown function DUF2427, IPR036259 MFS transporter superfamily, IPR038697 None FALSE
    XANPAGTX0501_009914-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010175-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010196-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010381-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_010444-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010683-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010706-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR011527 ABC transporter type 1, transmembrane domain, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036640 ABC transporter type 1, transmembrane domain superfamily FALSE

    4.8. All secreted proteins

    • Visualized all 608 putative secreted proteins
    secreted<-read.delim2("../../04_secretome/analysis_and_temp_files/01_predicting_effectors/all_secreted_list.txt",header=F)
    secreted$TranscriptID<-str_replace(secreted$V1,"FUN","XANPAGTX0501")
    
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_all.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=secreted$TranscriptID,
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_all.png") 

    • Of them, 154 (25.3%) are upregulated in lichen and 40 in culture (6.6%)
    secreted_sig<-sig %>% filter(target_id %in% secreted$TranscriptID)
    secreted_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      40
    ## 2 lichen      154
    ## 3 low_logFC   192
    • In lichen, secreted proteins make 0.1299578 of upregulated genes
      • compared to 0.0543585 across the whole genome
      • the number for upregulated in culture is 0.070922
    • Visualize only DGE secreted proteins
    secreted<-read.delim2("../../04_secretome/analysis_and_temp_files/01_predicting_effectors/all_secreted_list.txt",header=F)
    secreted$TranscriptID<-str_replace(secreted$V1,"FUN","XANPAGTX0501")
    
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_dge.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=secreted_sig$target_id[secreted_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_dge.png") 

    • All secreted proteins upregulated in lichen. There are a bunch of lectins there, GHs and other hydrolases, and a killer toxin
      • Most strongly upregulated are a GH128 (beta-1,3-glucanase / laminarinase), lectins, and a toxin
    secreted_table_lichen<-secreted_sig %>% 
      filter(change=="lichen") %>% left_join(funannot2,by = c("target_id"="TranscriptID")) %>% select(target_id,b,Product,CAZyme_new,COG_new,EggNog_new,GO.Terms_new,InterPro_new,PFAM_new,Protease_new,KO,lichen_ortho) %>% 
      arrange(b)
    secreted_table_lichen %>%
      kable(format = "html", col.names = colnames(secreted_table_lichen)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id b Product CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new KO lichen_ortho
    XANPAGTX0501_001127-T1 -6.214566 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE
    XANPAGTX0501_000655-T1 -6.186697 hypothetical protein NA NA NA NA IPR035992 Ricin B-like lectins NA NA FALSE
    XANPAGTX0501_009887-T1 -6.152645 hypothetical protein NA NA ENOG503P63W NA NA NA NA FALSE
    XANPAGTX0501_007865-T1 -5.732812 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005533-T1 -5.599048 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_010170-T1 -5.061039 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE
    XANPAGTX0501_001063-T1 -4.589396 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009135-T1 -4.527295 hypothetical protein NA NA NA GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_process: GO:0009405 - pathogenesis [Evidence IEA] IPR011329 Killer toxin Kp4/SMK, IPR015131 Killer toxin, Kp4 PF09044 NA FALSE
    XANPAGTX0501_005364-T1 -4.400832 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000656-T1 -4.357565 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE
    XANPAGTX0501_002409-T1 -4.088717 hypothetical protein NA NA NA GO_component: GO:0009277 - fungal-type cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR001338 Hydrophobin NA NA TRUE
    XANPAGTX0501_009189-T1 -4.014594 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_001935-T1 -3.959675 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006245-T1 -3.929116 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_005061-T1 -3.850853 hypothetical protein NA S:(S) Function unknown ENOG503NVBA NA NA PF13668 NA FALSE
    XANPAGTX0501_001856-T1 -3.823871 hypothetical protein NA NA ENOG503P50I NA IPR037176 Osmotin/thaumatin-like superfamily NA NA TRUE
    XANPAGTX0501_006282-T1 -3.776836 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_004309-T1 -3.720276 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_004881-T1 -3.693406 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_001015-T1 -3.506517 hypothetical protein NA S:(S) Function unknown ENOG503P58T, ENOG503P74E, ENOG503P76M GO_process: GO:0042742 - defense response to bacterium [Evidence IEA], GO_process: GO:0050832 - defense response to fungus [Evidence IEA] IPR001153 Barwin domain, IPR009009 RlpA-like protein, double-psi beta-barrel domain, IPR036908 RlpA-like domain superfamily PF00967, PF03330 NA FALSE
    XANPAGTX0501_010044-T1 -3.501860 hypothetical protein NA Q:(Q) Secondary metabolites biosynthesis, transport and catabolism ENOG503NTXA GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR002937 Amine oxidase, IPR036188 FAD/NAD(P)-binding domain superfamily PF13450 NA FALSE
    XANPAGTX0501_009317-T1 -3.484947 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE
    XANPAGTX0501_009340-T1 -3.459083 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006327-T1 -3.436352 hypothetical protein NA NA NA NA IPR018466 Yeast cell wall synthesis Kre9/Knh1-like, N-terminal PF10342 NA FALSE
    XANPAGTX0501_004930-T1 -3.341143 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002446-T1 -3.286528 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003765-T1 -3.256318 hypothetical protein NA NA NA GO_component: GO:0005618 - cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR000420 Yeast PIR protein repeat PF00399 NA FALSE
    XANPAGTX0501_005256-T1 -3.224319 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NV4F GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE
    XANPAGTX0501_006846-T1 -3.098928 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010449-T1 -2.870515 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P0GD GO_function: GO:0004222 - metalloendopeptidase activity [Evidence IEA], GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001384 Peptidase M35, deuterolysin, IPR024079 Metallopeptidase, catalytic domain superfamily, IPR029463 Lysine-specific metallo-endopeptidase PF02102, PF14521 M35 FALSE
    XANPAGTX0501_009688-T1 -2.868656 hypothetical protein NA NA NA GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0030248 - cellulose binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000254 Cellulose-binding domain, fungal, IPR035971 Cellulose-binding domain superfamily, IPR036908 RlpA-like domain superfamily PF00734 NA FALSE
    XANPAGTX0501_003270-T1 -2.857225 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_004956-T1 -2.837976 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005675-T1 -2.825087 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZAM GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR008754 Peptidase M43, pregnancy-associated plasma-A, IPR024079 Metallopeptidase, catalytic domain superfamily PF05572, PF13688 M43B FALSE
    XANPAGTX0501_000163-T1 -2.730902 hypothetical protein NA S:(S) Function unknown ENOG503NXZU NA IPR009078 Ferritin-like superfamily PF13668 NA TRUE
    XANPAGTX0501_009886-T1 -2.690801 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009668-T1 -2.648950 hypothetical protein NA NA ENOG503P6EY NA NA NA NA FALSE
    XANPAGTX0501_001304-T1 -2.616995 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006142-T1 -2.552912 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_003572-T1 -2.535693 hypothetical protein CE16 S:(S) Function unknown ENOG503P02M GO_function: GO:0016788 - hydrolase activity, acting on ester bonds [Evidence IEA] IPR001087 GDSL lipase/esterase, IPR036514 SGNH hydrolase superfamily PF00657 NA FALSE
    XANPAGTX0501_009606-T1 -2.529422 hypothetical protein NA NA ENOG503PMNB NA IPR011024 Gamma-crystallin-like NA NA FALSE
    XANPAGTX0501_006291-T1 -2.524612 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_008978-T1 -2.518736 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005573-T1 -2.506069 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_001458-T1 -2.475919 hypothetical protein NA G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG503NVEA, ENOG503P073 NA IPR002889 Carbohydrate-binding WSC, IPR018535 Domain of unknown function DUF1996 PF01822, PF09362 NA FALSE
    XANPAGTX0501_005361-T1 -2.474153 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_007976-T1 -2.346686 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009683-T1 -2.345949 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005733-T1 -2.302949 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_010693-T1 -2.297042 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NVX6 GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565 NA TRUE
    XANPAGTX0501_007272-T1 -2.264531 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002412-T1 -2.245791 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010247-T1 -2.240976 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_007515-T1 -2.210135 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NUDE, ENOG503NV31 GO_function: GO:0004190 - aspartic-type endopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001461 Aspartic peptidase A1 family, IPR001969 Aspartic peptidase, active site, IPR021109 Aspartic peptidase domain superfamily, IPR033121 Peptidase family A1 domain, IPR034163 Aspergillopepsin-like catalytic domain PF00026 A01A FALSE
    XANPAGTX0501_005771-T1 -2.208725 hypothetical protein GH5 G:(G) Carbohydrate transport and metabolism ENOG503NYXZ GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR001547 Glycoside hydrolase, family 5, IPR017853 Glycoside hydrolase superfamily PF00150 NA FALSE
    XANPAGTX0501_007362-T1 -2.200339 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010580-T1 -2.189884 hypothetical protein NA G:(G) Carbohydrate transport and metabolism ENOG501RPBK GO_function: GO:0005515 - protein binding [Evidence IEA] IPR011048 Cytochrome cd1-nitrite reductase-like, haem d1 domain superfamily, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR019405 Lactonase, 7-bladed beta propeller PF10282 NA FALSE
    XANPAGTX0501_004978-T1 -2.183515 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_001708-T1 -2.176671 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_009730-T1 -2.155593 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009615-T1 -2.152340 hypothetical protein NA NA NA NA IPR035940 CAP superfamily NA NA FALSE
    XANPAGTX0501_003873-T1 -2.109258 hypothetical protein GH25 G:(G) Carbohydrate transport and metabolism ENOG503P1T2 GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA] IPR002053 Glycoside hydrolase, family 25, IPR017853 Glycoside hydrolase superfamily, IPR018077 Glycoside hydrolase, family 25 subgroup PF01183 NA FALSE
    XANPAGTX0501_004880-T1 -2.100330 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002331-T1 -2.094181 hypothetical protein GH5 G:(G) Carbohydrate transport and metabolism ENOG503NY10 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR001547 Glycoside hydrolase, family 5, IPR017853 Glycoside hydrolase superfamily, IPR018087 Glycoside hydrolase, family 5, conserved site PF00150 NA FALSE
    XANPAGTX0501_007467-T1 -2.047189 hypothetical protein CE3 G:(G) Carbohydrate transport and metabolism, S:(S) Function unknown ENOG503NXZG, ENOG503P4E6 NA IPR013830 SGNH hydrolase-type esterase domain, IPR036514 SGNH hydrolase superfamily PF00657, PF13472, PF13517 NA FALSE
    XANPAGTX0501_008041-T1 -2.018648 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010250-T1 -1.956382 hypothetical protein NA NA ENOG503P3M4 NA NA NA NA FALSE
    XANPAGTX0501_004329-T1 -1.953200 hypothetical protein NA NA NA NA IPR020915 Uncharacterised protein family UPF0311 PF11578 NA TRUE
    XANPAGTX0501_008875-T1 -1.934027 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009699-T1 -1.923092 hypothetical protein GH16 G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZM7, ENOG503PDKW GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000757 Glycoside hydrolase family 16, IPR001002 Chitin-binding, type 1, IPR008264 Beta-glucanase, IPR013320 Concanavalin A-like lectin/glucanase domain superfamily, IPR036861 Endochitinase-like superfamily PF00722 NA FALSE
    XANPAGTX0501_010470-T1 -1.921148 hypothetical protein GH24 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P2E0, ENOG503P2RE GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA], GO_process: GO:0019076 - viral release from host cell [Evidence IEA], GO_process: GO:0019835 - cytolysis [Evidence IEA] IPR002196 Glycoside hydrolase, family 24, IPR009045 Hedgehog signalling/DD-peptidase zinc-binding domain superfamily, IPR023346 Lysozyme-like domain superfamily, IPR033907 Endolysin/autolysin, IPR034690 Endolysin T4 type PF00959 NA K01185 FALSE
    XANPAGTX0501_000609-T1 -1.909619 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_006349-T1 -1.876403 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000051-T1 -1.859342 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_002665-T1 -1.843112 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA TRUE
    XANPAGTX0501_001832-T1 -1.829702 hypothetical protein GH128 NA NA NA IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA TRUE
    XANPAGTX0501_004372-T1 -1.826518 hypothetical protein NA G:(G) Carbohydrate transport and metabolism, T:(T) Signal transduction mechanisms ENOG503NV5A, ENOG503P0DZ NA IPR002018 Carboxylesterase, type B, IPR019819 Carboxylesterase type B, conserved site, IPR019826 Carboxylesterase type B, active site, IPR029058 Alpha/Beta hydrolase fold PF00135, PF20434 S09X FALSE
    XANPAGTX0501_010022-T1 -1.809156 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000923-T1 -1.786540 hypothetical protein NA P:(P) Inorganic ion transport and metabolism ENOG503NV06 GO_function: GO:0003993 - acid phosphatase activity [Evidence IEA], GO_function: GO:0046872 - metal ion binding [Evidence IEA] IPR008963 Purple acid phosphatase-like, N-terminal, IPR018946 PhoD-like phosphatase, metallophosphatase domain, IPR032093 Phospholipase D, N-terminal, IPR038607 PhoD-like superfamily PF09423, PF16655 NA K01113 FALSE
    XANPAGTX0501_003361-T1 -1.748190 hypothetical protein NA NA ENOG503P519 NA NA NA NA FALSE
    XANPAGTX0501_009511-T1 -1.745629 hypothetical protein GH16 G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZM7, ENOG503PDKW GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000757 Glycoside hydrolase family 16, IPR001002 Chitin-binding, type 1, IPR008264 Beta-glucanase, IPR013320 Concanavalin A-like lectin/glucanase domain superfamily, IPR036861 Endochitinase-like superfamily PF00722 NA K01216 FALSE
    XANPAGTX0501_008019-T1 -1.720017 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002191-T1 -1.708273 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006967-T1 -1.686952 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_005715-T1 -1.646345 hypothetical protein AA7 C:(C) Energy production and conversion, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NXN6, ENOG503P15H GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA FALSE
    XANPAGTX0501_006283-T1 -1.624636 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE
    XANPAGTX0501_008270-T1 -1.619871 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005641-T1 -1.614221 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_010366-T1 -1.609971 hypothetical protein AA9 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NYCR, ENOG503Q4YD GO_function: GO:0003676 - nucleic acid binding [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA] IPR001002 Chitin-binding, type 1, IPR005103 Auxiliary Activity family 9, IPR013087 Zinc finger C2H2-type, IPR018371 Chitin-binding, type 1, conserved site, IPR036861 Endochitinase-like superfamily PF00187, PF03443 NA FALSE
    XANPAGTX0501_000467-T1 -1.600285 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_008622-T1 -1.585245 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_004346-T1 -1.568986 hypothetical protein NA S:(S) Function unknown ENOG503NUT8 NA IPR027589 Choice-of-anchor B domain NA NA FALSE
    XANPAGTX0501_006257-T1 -1.562667 hypothetical protein NA G:(G) Carbohydrate transport and metabolism ENOG503NW21 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000490 Glycoside hydrolase family 17, IPR017853 Glycoside hydrolase superfamily PF00332 NA K01210 FALSE
    XANPAGTX0501_007939-T1 -1.548201 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_010021-T1 -1.539151 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009026-T1 -1.512207 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003287-T1 -1.493602 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002186-T1 -1.483447 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006194-T1 -1.466337 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P7ES NA IPR036908 RlpA-like domain superfamily NA NA TRUE
    XANPAGTX0501_004515-T1 -1.449838 hypothetical protein NA NA NA NA IPR032710 NTF2-like domain superfamily, IPR037401 SnoaL-like domain PF13577 NA FALSE
    XANPAGTX0501_010040-T1 -1.448217 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010027-T1 -1.444367 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_009972-T1 -1.430084 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503Q3UA GO_function: GO:0004252 - serine-type endopeptidase activity [Evidence IEA], GO_function: GO:0008236 - serine-type peptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR000209 Peptidase S8/S53 domain, IPR015366 Peptidase S53, activation domain, IPR030400 Sedolisin domain, IPR036852 Peptidase S8/S53 domain superfamily PF00082, PF09286 S53 TRUE
    XANPAGTX0501_001919-T1 -1.411388 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000162-T1 -1.389788 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003766-T1 -1.385254 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003448-T1 -1.383005 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_009936-T1 -1.379267 hypothetical protein AA5, CBM32 O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG502I4WM, ENOG503NY8H GO_function: GO:0005515 - protein binding [Evidence IEA] IPR000421 Coagulation factor 5/8 C-terminal domain, IPR006652 Kelch repeat type 1, IPR008979 Galactose-binding-like domain superfamily, IPR011043 Galactose oxidase/kelch, beta-propeller, IPR013783 Immunoglobulin-like fold, IPR014756 Immunoglobulin E-set, IPR015202 Galactose oxidase-like, Early set domain, IPR037293 Galactose oxidase, central domain superfamily PF00754, PF01344, PF09118 NA FALSE
    XANPAGTX0501_008146-T1 -1.360688 hypothetical protein NA E:(E) Amino acid transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P41K GO_function: GO:0004185 - serine-type carboxypeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001563 Peptidase S10, serine carboxypeptidase, IPR018202 Serine carboxypeptidase, serine active site, IPR029058 Alpha/Beta hydrolase fold PF00450 S10 K01288 FALSE
    XANPAGTX0501_010026-T1 -1.349396 hypothetical protein NA S:(S) Function unknown ENOG503P0BZ NA IPR018392 LysM domain, IPR036779 LysM domain superfamily PF01476 NA FALSE
    XANPAGTX0501_001455-T1 -1.338580 Sperm-associated antigen 4 protein GH132 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NUJ1 NA IPR005556 SUN family PF03856 NA K01238 FALSE
    XANPAGTX0501_002315-T2 -1.329521 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_009690-T1 -1.325638 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_007280-T1 -1.317948 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_004983-T1 -1.308098 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005133-T1 -1.291558 hypothetical protein NA S:(S) Function unknown ENOG503NXPR NA IPR039535 Arylsulfotransferase-like PF05935, PF14269 NA FALSE
    XANPAGTX0501_000606-T1 -1.268241 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_007268-T1 -1.263293 hypothetical protein AA7 C:(C) Energy production and conversion, S:(S) Function unknown ENOG503NXRX, ENOG503PBAU GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE
    XANPAGTX0501_004065-T1 -1.250886 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006382-T1 -1.241749 hypothetical protein NA NA ENOG503P53I NA NA NA NA FALSE
    XANPAGTX0501_005717-T1 -1.235652 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_008990-T1 -1.228845 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009689-T1 -1.216985 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009731-T1 -1.215351 hypothetical protein GH71 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NVTC GO_function: GO:0016787 - hydrolase activity [Evidence IEA] IPR005197 Glycoside hydrolase family 71 PF03659 NA K08254 FALSE
    XANPAGTX0501_008429-T1 -1.195150 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_001183-T1 -1.191723 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000737-T1 -1.183761 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010624-T1 -1.176356 hypothetical protein NA J:(J) Translation, ribosomal structure and biogenesis ENOG503NVR9 NA IPR023631 Amidase signature domain, IPR036928 Amidase signature (AS) superfamily NA NA FALSE
    XANPAGTX0501_003606-T1 -1.168749 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005629-T1 -1.163876 hypothetical protein NA S:(S) Function unknown ENOG503NY4Y GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily, IPR030395 Glycerophosphodiester phosphodiesterase domain PF03009 NA FALSE
    XANPAGTX0501_002565-T1 -1.159972 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006751-T1 -1.137824 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010682-T1 -1.134135 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_000697-T1 -1.131109 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002041-T1 -1.127546 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006292-T1 -1.107115 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_010443-T1 -1.107115 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005317-T1 -1.102348 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002543-T1 -1.098046 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_006860-T1 -1.097166 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE
    XANPAGTX0501_007655-T1 -1.094363 hypothetical protein NA S:(S) Function unknown ENOG503NV1P GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily NA NA FALSE
    XANPAGTX0501_001970-T1 -1.089366 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009607-T1 -1.069439 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_010370-T1 -1.066233 hypothetical protein NA NA NA NA IPR003609 PAN/Apple domain PF00024 NA FALSE
    XANPAGTX0501_002412-T2 -1.065097 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003285-T1 -1.064411 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NV2A GO_function: GO:0008236 - serine-type peptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR008758 Peptidase S28, IPR029058 Alpha/Beta hydrolase fold, IPR042269 Serine carboxypeptidase S28, SKS domain NA S28 K09649 FALSE
    XANPAGTX0501_000434-T1 -1.045784 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003186-T1 -1.045733 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009011-T1 -1.036787 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_002514-T1 -1.030316 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_008683-T1 -1.021294 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009311-T1 -1.019264 hypothetical protein NA NA NA NA IPR008427 Extracellular membrane protein, CFEM domain PF05730 NA TRUE
    XANPAGTX0501_008027-T1 -1.017996 hypothetical protein NA S:(S) Function unknown ENOG503NZ6V GO_function: GO:0004601 - peroxidase activity [Evidence IEA] IPR000028 Chloroperoxidase, IPR036851 Chloroperoxidase-like superfamily PF01328 NA FALSE
    XANPAGTX0501_003124-T1 -1.008637 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P2G1 GO_function: GO:0004190 - aspartic-type endopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001461 Aspartic peptidase A1 family, IPR001969 Aspartic peptidase, active site, IPR021109 Aspartic peptidase domain superfamily, IPR033121 Peptidase family A1 domain, IPR034164 Pepsin-like domain PF00026 A01A FALSE
    • All secreted proteins upregulated in culture, arranged by b value. Mostly enzymes
      • Most strongly upregulated are AA7, AA5, GH128 and galactose-binding enzymes
    secreted_table_culture<-secreted_sig %>% 
      filter(change=="culture") %>% left_join(funannot2,by = c("target_id"="TranscriptID")) %>% select(target_id,b,Product,CAZyme_new,COG_new,EggNog_new,GO.Terms_new,InterPro_new,PFAM_new,Protease_new,KO,lichen_ortho) %>%
      arrange(desc(b))
    secreted_table_culture %>%
      kable(format = "html", col.names = colnames(secreted_table_culture)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id b Product CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new KO lichen_ortho
    XANPAGTX0501_005691-T1 5.859572 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NVX6 GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565 NA TRUE
    XANPAGTX0501_005118-T1 5.651733 hypothetical protein NA NA ENOG503PWV2, ENOG503PYSR NA NA NA NA TRUE
    XANPAGTX0501_006848-T1 3.980717 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_008294-T1 3.732327 hypothetical protein AA5, CBM32 O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG502I4WM, ENOG503NY8H GO_function: GO:0005515 - protein binding [Evidence IEA] IPR000421 Coagulation factor 5/8 C-terminal domain, IPR006652 Kelch repeat type 1, IPR008979 Galactose-binding-like domain superfamily, IPR011043 Galactose oxidase/kelch, beta-propeller, IPR013783 Immunoglobulin-like fold, IPR014756 Immunoglobulin E-set, IPR015202 Galactose oxidase-like, Early set domain, IPR037293 Galactose oxidase, central domain superfamily PF00754, PF01344, PF09118 NA FALSE
    XANPAGTX0501_010474-T1 3.724444 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE
    XANPAGTX0501_003863-T2 3.301034 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_000306-T1 3.231592 hypothetical protein NA S:(S) Function unknown ENOG503P21A NA IPR021851 Protein of unknown function DUF3455 PF11937 NA FALSE
    XANPAGTX0501_006869-T1 3.075100 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE
    XANPAGTX0501_005759-T1 2.880832 hypothetical protein NA NA ENOG503P558 NA NA NA NA FALSE
    XANPAGTX0501_009761-T1 2.701300 hypothetical protein NA S:(S) Function unknown ENOG503NZQD GO_component: GO:0005576 - extracellular region [Evidence IEA] IPR001283 Cysteine-rich secretory protein-related, IPR014044 CAP domain, IPR018244 Allergen V5/Tpx-1-related, conserved site, IPR034120 None, IPR035940 CAP superfamily PF00188 NA K20412 FALSE
    XANPAGTX0501_000850-T1 2.534878 hypothetical protein NA NA ENOG503P7C3 NA NA NA NA FALSE
    XANPAGTX0501_006926-T1 2.528494 hypothetical protein NA NA NA NA NA NA NA TRUE
    XANPAGTX0501_009613-T1 2.515673 hypothetical protein NA S:(S) Function unknown ENOG503NZZ0 NA IPR000073 Alpha/beta hydrolase fold-1, IPR029058 Alpha/Beta hydrolase fold PF12697 NA TRUE
    XANPAGTX0501_010126-T2 2.407552 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA FALSE
    XANPAGTX0501_009170-T1 2.253664 hypothetical protein GH45 G:(G) Carbohydrate transport and metabolism ENOG503P08U, ENOG503Q3X3 GO_function: GO:0008810 - cellulase activity [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000334 Glycoside hydrolase, family 45, IPR036908 RlpA-like domain superfamily PF02015 NA FALSE
    XANPAGTX0501_001974-T1 2.218747 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NWXH GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE
    XANPAGTX0501_010307-T1 1.956031 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_003850-T1 1.889111 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_009609-T1 1.620965 hypothetical protein NA I:(I) Lipid transport and metabolism, S:(S) Function unknown ENOG503NZ1V GO_function: GO:0004806 - triglyceride lipase activity [Evidence IEA], GO_process: GO:0016042 - lipid catabolic process [Evidence IEA] IPR000073 Alpha/beta hydrolase fold-1, IPR005152 Lipase, secreted, IPR022742 Serine aminopeptidase, S33, IPR029058 Alpha/Beta hydrolase fold PF03583, PF12146, PF12697 NA TRUE
    XANPAGTX0501_010442-T1 1.618450 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_005664-T1 1.551288 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE
    XANPAGTX0501_007937-T1 1.503271 hypothetical protein NA NA ENOG503NVDR GO_function: GO:0005515 - protein binding [Evidence IEA] IPR011044 Quinoprotein amine dehydrogenase, beta chain-like, IPR015943 WD40/YVTN repeat-like-containing domain superfamily NA NA FALSE
    XANPAGTX0501_000401-T1 1.401801 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_007985-T1 1.397612 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA FALSE
    XANPAGTX0501_006091-T1 1.369228 hypothetical protein NA NA ENOG503P8K1 NA NA NA NA FALSE
    XANPAGTX0501_009962-T1 1.333092 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_007182-T1 1.319524 alpha,alpha-trehalase ath1 NA G:(G) Carbohydrate transport and metabolism ENOG503NUMQ GO_function: GO:0003824 - catalytic activity [Evidence IEA], GO_function: GO:0030246 - carbohydrate binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR005195 Glycoside hydrolase, family 65, central catalytic, IPR005196 Glycoside hydrolase, family 65, N-terminal, IPR008928 Six-hairpin glycosidase superfamily, IPR008979 Galactose-binding-like domain superfamily, IPR011013 Galactose mutarotase-like domain superfamily, IPR012341 Six-hairpin glycosidase-like superfamily, IPR037018 Glycoside hydrolase family 65, N-terminal domain superfamily PF03633, PF03636 NA FALSE
    XANPAGTX0501_010132-T1 1.308632 hypothetical protein GH43 G:(G) Carbohydrate transport and metabolism ENOG503P1F3 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR006710 Glycoside hydrolase, family 43, IPR023296 Glycosyl hydrolase, five-bladed beta-propellor domain superfamily PF04616 NA FALSE
    XANPAGTX0501_005244-T1 1.304550 hypothetical protein AA3 E:(E) Amino acid transport and metabolism ENOG503NU6B, ENOG503NUFD GO_function: GO:0016614 - oxidoreductase activity, acting on CH-OH group of donors [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily PF00732, PF05199 NA K00115 FALSE
    XANPAGTX0501_007388-T1 1.269375 hypothetical protein NA S:(S) Function unknown ENOG503P3AN GO_function: GO:0005515 - protein binding [Evidence IEA] IPR002372 Pyrrolo-quinoline quinone repeat, IPR011047 Quinoprotein alcohol dehydrogenase-like superfamily, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR018391 Pyrrolo-quinoline quinone beta-propeller repeat NA NA FALSE
    XANPAGTX0501_006386-T1 1.206100 hypothetical protein NA NA NA NA NA NA NA FALSE
    XANPAGTX0501_002723-T1 1.203666 hypothetical protein NA S:(S) Function unknown ENOG503NXPR NA IPR039535 Arylsulfotransferase-like PF05935, PF14269 NA FALSE
    XANPAGTX0501_006694-T1 1.155895 hypothetical protein NA NA ENOG503P24M NA NA NA NA FALSE
    XANPAGTX0501_010191-T1 1.151281 hypothetical protein CE5 G:(G) Carbohydrate transport and metabolism ENOG503P4Q7 GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0016787 - hydrolase activity [Evidence IEA], GO_function: GO:0050525 - cutinase activity [Evidence IEA] IPR000675 Cutinase/acetylxylan esterase, IPR011150 Cutinase, monofunctional, IPR029058 Alpha/Beta hydrolase fold PF01083 NA FALSE
    XANPAGTX0501_001051-T1 1.077085 hypothetical protein GH17 G:(G) Carbohydrate transport and metabolism ENOG503NXKT NA IPR017853 Glycoside hydrolase superfamily NA NA FALSE
    XANPAGTX0501_009124-T1 1.069091 hypothetical protein NA S:(S) Function unknown ENOG503NVXI, ENOG503P4UQ GO_component: GO:0005618 - cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR000420 Yeast PIR protein repeat PF00399 NA FALSE
    XANPAGTX0501_007482-T1 1.051174 hypothetical protein GH132 S:(S) Function unknown ENOG503NVKV NA IPR005556 SUN family PF03856 NA FALSE
    XANPAGTX0501_003140-T1 1.035102 hypothetical protein AA3 E:(E) Amino acid transport and metabolism ENOG503PBWU GO_function: GO:0016614 - oxidoreductase activity, acting on CH-OH group of donors [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily PF00732, PF05199 NA TRUE
    XANPAGTX0501_009902-T1 1.034038 hypothetical protein NA P:(P) Inorganic ion transport and metabolism ENOG503NY8E NA IPR001148 Alpha carbonic anhydrase domain, IPR036398 Alpha carbonic anhydrase domain superfamily, IPR041891 Carbonic anhydrase, prokaryotic-like PF00194 NA K01672 FALSE
    XANPAGTX0501_010393-T1 1.019087 hypothetical protein NA NA NA NA NA NA NA FALSE

    4.9. Putative NLRs and other self/non-self recognition proteins

    Putative NLRs

    • Got the list of putative NLRs
    nlr_list<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/10_nlr/NLRs.txt")
    • Four upregulated in lichen, one in cluture
    nlr_sig<-sig %>% filter(target_id %in% nlr_list$TranscriptID)
    nlr_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       1
    ## 2 lichen        4
    ## 3 low_logFC     9
    • For all lichen-upregulated, we can see they are expressed in culture too
    nlr_lichen<-nlr_sig$target_id[nlr_sig$change=="lichen"]
    plot_bootstrap(so, 
                   target_id = nlr_lichen[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[3], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[4], 
                   units = "est_counts", 
                   color_by = "condition")

    • What kind of NLRs are they?
      • XANPAGTX0501_005494-T1: NACHT + Ankyrin domains
      • XANPAGTX0501_002585-T1: HeLo + NACHT + Ankyrin domains **This is promising: “The HeLo domain of HET-S represents the cell death execution domain in the het-s/het-S system” quote from Daskalov et al. 2012
      • XANPAGTX0501_005102-T1: Nucleoside Phosphotase + NACHT + Ankyrin domains
      • XANPAGTX0501_010687-T1: NACHT + Ankyrin domains
    • The one that is culture-upregulated, is inconsistent and is highly-expressed in most lichen samples too
    plot_bootstrap(so, 
                   target_id = nlr_sig$target_id[nlr_sig$change=="culture"], 
                   units = "est_counts", 
                   color_by = "condition")

    • The protein is NB-ARC + TRP

    Broader set of NLR-like proteins

    • Got the list of NLR-like proteins, including those without a complete set of domains
    nlr_like<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/10_nlr/NLR_like.txt")
    • 12 upregulated in lichen, 3 in cluture
    nlrlike_sig<-sig %>% filter(target_id %in% nlr_like$TranscriptID) %>% arrange(b)
    nlrlike_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       3
    ## 2 lichen       12
    ## 3 low_logFC    14
    • Some lichen-upregulated are very strongly upregulated
      • XANPAGTX0501_007753-T1 is not expressed in culture but strongly expressed in nearly all lichen samples. It has one P-loop domain
      • XANPAGTX0501_010570-T1 is not expressed in culture but strongly expressed in nearly all lichen samples. It has P-loop domain + TPR
      • XANPAGTX0501_002436-T1 is expressed in both, but clearly is lichen-upregulated. It has STAND + NACHT domains (STAND function is unknown)
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[3], 
                   units = "est_counts", 
                   color_by = "condition")

    Other potential self/non-self recognition proteins

    • In total, the genome has 73 proteins with a HET or HaLo domain. Of them, 16 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR038305",InterPro_new) | 
                           grepl("IPR029498",InterPro_new) | 
                           grepl("IPR010730",InterPro_new)) %>% 
      group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE           57
    ## 2 TRUE            16
    • Lichen and culture both have some upregulated
    het_sig<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR038305",InterPro_new) | 
                           grepl("IPR029498",InterPro_new) | 
                           grepl("IPR010730",InterPro_new)) %>% arrange(b)
    het_sig %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       6
    ## 2 lichen       14
    ## 3 low_logFC    24
    ## 4 <NA>         29
    • Here only showing those proteins not discussed above as NLRs or NLR-like
    het_sig2<-het_sig %>% filter(!(TranscriptID %in% nlrlike_sig$target_id),!is.na(b)) 
    het_sig2 %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       5
    ## 2 lichen       10
    ## 3 low_logFC    20
    • Visualize most lichen-upregulated
      • Top two have only HET domain
      • The third one has HeLo domains and protein kinase domains
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[3], 
                   units = "est_counts", 
                   color_by = "condition")

    * Visualize most culture-upregulated * For first and third, expression in lichens is very non-uniform * Second looks more regular

    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)-1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)-2], 
                   units = "est_counts", 
                   color_by = "condition")

    4.10. Protein ubiquitination

    • In addition to already pictured XANPAGTX0501_001643-T1, I wanted to visualize other F-box proteins - primarily to check that they are upregulated among all lichen samples, not just apothecia
      • All seem good, inluding those with the b value most close to the threshold
    fbox<-sig2 %>% filter(grepl("F-box",InterPro_new),change!="low_logFC")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[10], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[9], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[4], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[5], 
                   units = "est_counts", 
                   color_by = "condition")
    
    poz<-sig2 %>% filter(grepl("BTB/POZ",InterPro_new),change!="low_logFC")
    plot_bootstrap(so, 
                   target_id = poz$target_id[3], 
                   units = "est_counts", 
                   color_by = "condition")

    4.11. RNAi and RNA binding

    • Similarly, wanted to check that they are upregulated among all lichen samples, not just apothecia
      • again, all is good
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002123-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_003775-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008184-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    4.12. Apothecia-upregulated transporter

    • wanted to check thow looks the profile of the inositol and sugar transporters that is upregulated in apothecia, but not in lichen compared to culture. The stats look legit ( althought the degree of variation for the inositol transporter in the culture is peculiar)
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_001825-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_009737-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    4.13. TFs

    • Are there any patterns in which TFs are upregulated where? NB: this table have duplicates, i.e. if a protein has two TF domains it’s counted twice
      • “Transcription factor domain, fungi” is a domain that mostly occurs in proteins also classified as C2H2 or Zn2C6
    mult_gene_table %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(Function=="Transcription factors") %>%
      group_by(change,Description) %>% summarize(n=n()) %>%
      pivot_wider(names_from = change,values_from = n,values_fill=0) %>%
      filter(culture!=0 | lichen!=0)
    ## `summarise()` has grouped output by 'change'. You can override using the
    ## `.groups` argument.
    ## # A tibble: 9 × 5
    ##   Description                                     culture lichen low_logFC  `NA`
    ##   <chr>                                             <int>  <int>     <int> <int>
    ## 1 Basic-leucine zipper domain                           1      0        10     7
    ## 2 DNA binding HTH domain, Psq-type                      1      0         0     0
    ## 3 Fungal transcription factor                           3      1         7     9
    ## 4 Helix-loop-helix DNA-binding domain superfamily       1      0         0     6
    ## 5 Zinc finger C2H2-type                                 1      9        36    41
    ## 6 Zinc finger, RING/FYVE/PHD-type                       1      4        41    60
    ## 7 Zn(2)-C6 fungal-type DNA-binding domain superf…       9      6        37    37
    ## 8 Homeobox domain                                       0      1         4     0
    ## 9 Transcription factor domain, fungi                    0      2        21    21
    • Look at Zinc finger C2H2-type specifically
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR013087",InterPro),change %in% c("lichen","culture")) %>%
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_000420-T1 -1.619168 lichen hypothetical protein IPR013087 Zinc finger C2H2-type
    XANPAGTX0501_000984-T1 -3.164451 lichen hypothetical protein IPR002151 Kinesin light chain;IPR011990 Tetratricopeptide-like helical domain superfamily;IPR013026 None;IPR013087 Zinc finger C2H2-type;IPR019734 Tetratricopeptide repeat
    XANPAGTX0501_001247-T1 -2.397327 lichen hypothetical protein IPR013087 Zinc finger C2H2-type;IPR036236 Zinc finger C2H2 superfamily
    XANPAGTX0501_001515-T1 -1.263312 lichen hypothetical protein IPR013087 Zinc finger C2H2-type
    XANPAGTX0501_006931-T1 -1.314529 lichen hypothetical protein IPR013087 Zinc finger C2H2-type
    XANPAGTX0501_007754-T1 -2.683149 lichen hypothetical protein IPR013087 Zinc finger C2H2-type;IPR019447 DNA/RNA-binding protein Kin17, WH-like domain;IPR036236 Zinc finger C2H2 superfamily;IPR038254 DNA/RNA-binding protein KIN17, WH-like domain superfamily
    • Which TFs are more common in the genome in general?
    t<-mult_gene_table %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
       filter(Function=="Transcription factors") %>% 
      group_by(change,Description) %>% summarize(n=n()) %>%
      pivot_wider(names_from = change,values_from = n,values_fill=0)
    ## `summarise()` has grouped output by 'change'. You can override using the
    ## `.groups` argument.
    colnames(t)<-c("Description","culture","lichen","low_logFC","Non")
    t %>% mutate(sum=culture+lichen+low_logFC+Non) %>%arrange(desc(sum)) %>%
    kable(format = "html", col.names = c("Description","culture","lichen","low_logFC","Non","sum")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    Description culture lichen low_logFC Non sum
    Zinc finger, RING/FYVE/PHD-type 1 4 41 60 106
    Zn(2)-C6 fungal-type DNA-binding domain superfamily 9 6 37 37 89
    Zinc finger C2H2-type 1 9 36 41 87
    Transcription factor domain, fungi 0 2 21 21 44
    Fungal transcription factor 3 1 7 9 20
    Basic-leucine zipper domain 1 0 10 7 18
    Fork head domain 0 0 5 4 9
    Myb domain 0 0 4 4 8
    Helix-loop-helix DNA-binding domain superfamily 1 0 0 6 7
    Zinc finger, GATA-type 0 0 3 4 7
    Transcription factor CBF/NF-Y/archaeal histone domain 0 0 3 3 6
    Homeobox domain 0 1 4 0 5
    HTH APSES-type DNA-binding domain superfamily 0 0 2 3 5
    Zinc finger, TFIIS-type 0 0 1 4 5
    Heat shock factor (HSF)-type, DNA-binding 0 0 1 2 3
    Zinc finger, MIZ-type 0 0 1 2 3
    Zinc finger, NF-X1-type 0 0 3 0 3
    Nuclear transcription factor Y subunit A 0 0 2 0 2
    PAS fold 0 0 1 1 2
    Transcription Factor IIF, Rap30/Rap74, interaction 0 0 1 1 2
    Transcription factor TFIIB 0 0 1 1 2
    Transcription factor, MADS-box 0 0 2 0 2
    p53-like transcription factor, DNA-binding domain superfamily 0 0 1 1 2
    DNA-binding RFX-type winged-helix domain 0 0 0 2 2
    Transcription factor IIIC subunit 5, HTH domain 0 0 0 2 2
    Transcription factor IIS, N-termina 0 0 0 2 2
    DNA binding HTH domain, Psq-type 1 0 0 0 1
    CP2 transcription factor 0 0 1 0 1
    NDT80 DNA-binding domain superfamily 0 0 1 0 1
    SNU66/SART1 family 0 0 1 0 1
    TFIIH C1-like domain 0 0 1 0 1
    Transcription factor IIIC, 90kDa subunit, N-terminal 0 0 1 0 1
    Transcription factor Opi1 0 0 1 0 1
    Transcription factor TFIIIB component B, Myb domain 0 0 1 0 1
    Transcription factor, NFYB/HAP3, conserved site 0 0 1 0 1
    Transcription initiation factor IIA, gamma subunit 0 0 1 0 1
    Zinc finger, BED-type 0 0 1 0 1
    Zinc finger, C5HC2-type 0 0 1 0 1
    Zinc finger, GRF-type 0 0 1 0 1
    Apoptosis-antagonizing transcription factor, C-terminal 0 0 0 1 1
    CCR4-Not complex component, Not1, C-terminal 0 0 0 1 1
    DDT domain 0 0 0 1 1
    General transcription factor TFIIH, subunit Tfb6 0 0 0 1 1
    TEA/ATTS domain 0 0 0 1 1
    Transcription factor IIA, alpha/beta subunit 0 0 0 1 1
    Transcription factor Nrm1/Whi5 0 0 0 1 1
    Transcription factor Spt20 0 0 0 1 1
    Transcription factor Ste12 0 0 0 1 1
    Transcription factor TFIID, subunit 8, C-terminal 0 0 0 1 1
    Transcription factor TFIIE beta subunit, DNA-binding domain 0 0 0 1 1
    Transcription factor TFIIH subunit p52/Tfb2 0 0 0 1 1
    Transcription factor TFIIIC, triple barrel domain 0 0 0 1 1
    Transcription factor, Skn7-like 0 0 0 1 1
    • Look into Zinc finger, RING/FYVE/PHD-type
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR013083",InterPro),change %in% c("lichen","culture")) %>%
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_002947-T1 -1.534891 lichen hypothetical protein IPR000330 SNF2, N-terminal;IPR001650 Helicase, C-terminal;IPR001841 Zinc finger, RING-type;IPR013083 Zinc finger, RING/FYVE/PHD-type;IPR014001 Helicase superfamily 1/2, ATP-binding domain;IPR017907 Zinc finger, RING-type, conserved site;IPR018957 Zinc finger, C3HC4 RING-type;IPR027417 P-loop containing nucleoside triphosphate hydrolase;IPR038718 SNF2-like, N-terminal domain superfamily
    XANPAGTX0501_005479-T1 2.092073 culture peroxisome bioproteinsis factor 10 IPR001841 Zinc finger, RING-type;IPR006845 Pex, N-terminal;IPR013083 Zinc finger, RING/FYVE/PHD-type;IPR017907 Zinc finger, RING-type, conserved site
    XANPAGTX0501_006448-T1 -1.199765 lichen hypothetical protein IPR001841 Zinc finger, RING-type;IPR013083 Zinc finger, RING/FYVE/PHD-type
    XANPAGTX0501_009436-T1 -1.317000 lichen hypothetical protein IPR000330 SNF2, N-terminal;IPR001650 Helicase, C-terminal;IPR001841 Zinc finger, RING-type;IPR013083 Zinc finger, RING/FYVE/PHD-type;IPR014001 Helicase superfamily 1/2, ATP-binding domain;IPR027417 P-loop containing nucleoside triphosphate hydrolase;IPR038718 SNF2-like, N-terminal domain superfamily
    • Look into Zn(2)-C6 fungal-type DNA-binding domain superfamily
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>% 
      filter(grepl("IPR036864",InterPro),change %in% c("lichen","culture")) %>%
      select(TranscriptID,b,change,Product,InterPro) %>%
    kable(format = "html", col.names = c("TranscriptID","b","change","product","InterPro")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID b change product InterPro
    XANPAGTX0501_001980-T1 1.496530 culture hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_002161-T1 2.266902 culture hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_002427-T1 -1.786972 lichen hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_002762-T1 1.105893 culture DNA-binding transcriptional regulator ume6 IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR021858 Fungal transcription factor;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_004715-T1 -1.034993 lichen Transcriptional regulator of nonfermentable carbon utilization IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_005120-T1 5.474386 culture hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_005608-T1 2.365288 culture hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR021858 Fungal transcription factor;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily
    XANPAGTX0501_009300-T1 1.743077 culture hypothetical protein IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain;IPR021858 Fungal transcription factor;IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily

    5. Preparing visuals

    Big heatmap, grouped by function

    • Since Complexheatmap didn’t allow me to have multile vertical panels united by one top annotation with single clustering, had to make two heatmapes (one split into panels and one whole) and combine manually in AI
    • Draw whole heatmap, which will be used for the top annotation and clustering
    #make color mapping consistent between different heatmaps
    library(circlize)
    library(extrafont)
    col_fun <- colorRamp2(seq(0,8,length.out = 100), viridis(100))
    
    filt_mat<-subset(trans_mat, rownames(trans_mat) %in% annotation$TranscriptID)
    HM_all = Heatmap(filt_mat, show_row_names = F, show_column_names = T, row_title_rot = 0, col=col_fun,top_annotation = ta,heatmap_legend_param = list(title = "Expression: log(TPM)"))
    
    #get the column order
    colord<-column_order(HM_all)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); column_order(ht)`.
    colord<-colnames(filt_mat)[colord]
    
    draw(HM_all)
    
    pdf(file="../results/big_heatmap_unseparated.pdf",width=6,height=6)
    draw(HM_all)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    • Draw two halves of the actual heatmaps
    #define function
    draw_heatmap_by_group<-function(gene_group,annotation_table = annotation_temp,matrix = trans_mat, order = colord){
      list<-annotation_table$TranscriptID[annotation_table$Function==gene_group]
      filt_mat<-subset(matrix, rownames(matrix) %in% list)
      HM = Heatmap(filt_mat, show_row_names = F, show_column_names = T, row_title = gene_group, row_title_rot = 0, col=col_fun,  show_heatmap_legend = FALSE,
        cluster_columns = FALSE,column_order = order,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
        row_title_gp = gpar(fontsize = 7, font="Arial"))
      return(HM)
    }
    
    
    #make a list of all gene groups in the right order
    annotation_temp<-annotation %>% left_join(mult_list %>% select(Function, Function_type) %>% unique()) %>% arrange(Function)
    annotation_temp$Function_type<-factor(annotation_temp$Function_type,
      levels=c("Fungal multicellularity", "Expression regulation","Signal transduction"),ordered = T)
    annotation_temp<-annotation_temp %>% arrange(Function_type)
    
    #rename some of the functions to make shorter
    annotation_temp$Function[annotation_temp$Function=="Acetyl-CoA production and metabolism"]<-"Acetyl-CoA production\nand metabolism"
    annotation_temp$Function[annotation_temp$Function=="Cell division, proliferation and growth"]<-"Cell division and growth"
    annotation_temp$Function[annotation_temp$Function=="Cell surface and cell wall proteins"]<-"Cell wall proteins"
    annotation_temp$Function[annotation_temp$Function=="Cell surface and cell wall proteins"]<-"Cell wall proteins"
    
    #apply to all gene groups
    l<-lapply(as.vector(unique(annotation_temp$Function)),draw_heatmap_by_group)
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #add other gene groups, which aren't in the multicellularity list
    #protein kinases
    kinase_list<-funannot$TranscriptID[grepl("IPR000719",funannot$InterPro_new) & funannot$TranscriptID %in% sig$target_id[sig$change %in% c("lichen","culture")]]
    filt_mat_kinase<-subset(trans_mat, rownames(trans_mat) %in% kinase_list)
    HM_kinase = Heatmap(filt_mat_kinase, show_row_names = F, show_column_names = T, row_title = "Protein Kinases", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #NLRs
    nlr_list<-nlr_sig$target_id[nlr_sig$change %in% c("lichen","culture")]
    filt_mat_nlr<-subset(trans_mat, rownames(trans_mat) %in% nlr_list)
    HM_nlr = Heatmap(filt_mat_nlr, show_row_names = F, show_column_names = T, row_title = "Putative NLRs", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #Other het-incopatibility
    het_list<-het_sig$TranscriptID[het_sig$change %in% c("lichen","culture") & !(het_sig$TranscriptID %in% nlr_list)]
    filt_mat_het<-subset(trans_mat, rownames(trans_mat) %in% het_list)
    HM_het = Heatmap(filt_mat_het, show_row_names = F, show_column_names = T, row_title = "Other self/non-self\nrecognition", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #replace 4 panels for signaling pathways with one 
    signaling_list<-annotation$TranscriptID[annotation$Function %in% c("G-proteins and GPCR", "MAPK signaling pathway","Oxylipins","Velvet")]
    filt_mat_signal<-subset(trans_mat, rownames(trans_mat) %in% signaling_list)
    HM_signal = Heatmap(filt_mat_signal, show_row_names = F, show_column_names = T, row_title = "Signal transduction", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #draw
    ht_list <- l[[4]] %v% l[[5]] %v% l[[7]] %v% l[[6]] %v% l[[3]] %v% l[[8]] %v% 
      l[[20]] %v% l[[14]] %v% l[[1]] %v% l[[11]] %v% l[[2]] %v% l[[15]] %v% l[[16]] %v% l[[19]]  %v% HM_signal %v% HM_kinase %v% HM_nlr %v% HM_het
    
    #draw in two parts
    ht_list1 <- l[[4]] %v% l[[5]] %v% l[[7]] %v% l[[6]] %v% l[[3]] %v% l[[8]] %v% 
      l[[20]] %v% l[[14]] %v% l[[1]] %v% l[[11]] %v% l[[2]] 
    
    ht_list2 <- l[[15]] %v% l[[16]] %v% l[[19]]  %v% HM_signal %v% HM_kinase %v% HM_nlr %v% HM_het
    
    draw(ht_list1)
    • Export the heat map
    pdf(file="../results/big_heatmap_part1.pdf",width=4.25,height=4.5)
    draw(ht_list1, newpage = FALSE)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    pdf(file="../results/big_heatmap_part2.pdf",width=4,height=3.5)
    draw(ht_list2)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    • Parietin gene cluster
    parietin_list<-c("XANPAGTX0501_008850-T1","XANPAGTX0501_008851-T1","XANPAGTX0501_008852-T1","XANPAGTX0501_008853-T1")
    
    filt_mat_parietin<-subset(trans_mat, rownames(trans_mat) %in% parietin_list)
    HM_parietin = Heatmap(filt_mat_parietin, show_row_names = T, show_column_names = T,cluster_rows = FALSE, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/parietin_heatmap.pdf",width=10,height=2.5)
    draw(HM_parietin)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_parietin
    • Make new heat map for all DGE secreted proteins
    secreted_list<-secreted_sig$target_id[secreted_sig$change!="low_logFC"]
    filt_mat_secreted<-subset(trans_mat, rownames(trans_mat) %in% secreted_list)
    HM_secreted = Heatmap(filt_mat_secreted, show_row_names = F, show_column_names = T,cluster_rows = T, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/secreted_dge_heatmap.pdf",width=10,height=8)
    draw(HM_secreted)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_secreted
    • Make box plot for the putative effecotr XANPAGTX0501_009887-T1
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_009887-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    * Make heat map for the same effector

    filt_mat_eff<-subset(trans_mat, rownames(trans_mat) =="XANPAGTX0501_009887-T1")
    HM_eff = Heatmap(filt_mat_eff, show_row_names = F, show_column_names = T,cluster_rows = T, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/XANPAGTX0501_009887-T1_heatmap.pdf",width=10,height=2)
    draw(HM_eff)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_eff
    • Make heatmap for all core biosynthetic genes (regardless of their DGE status)
      • Added manually DGE status in Ai
    antismash_core<-antismash_gene_all %>% filter(Core_gene==T) %>%
      left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) 
    
    
    filt_mat_core<-subset(trans_mat, rownames(trans_mat) %in% antismash_core$TranscriptID)
    
    #define row annotation
    ##for one protein, XANPAGTX0501_001694-T1, we had two annotations since it is a core gene in two cluster: Xp_GTX0501_2_Cluster_6 and Xp_GTX0501_2_Cluster_7. here I picked Xp_GTX0501_2_Cluster_7, since Xp_GTX0501_2_Cluster_6 had two core genes
    ra<-data.frame("TranscriptID"=rownames(filt_mat_core)) %>% left_join(antismash_core) %>%
      filter(TranscriptID!="XANPAGTX0501_001694-T1"|Cluster!="Xp_GTX0501_2_Cluster_6") %>%
      select(Description)
    ra_colors = c("fungal-RiPP-like"= brewer.pal(8,"Dark2")[1], "NRP-metallophore"= brewer.pal(8,"Dark2")[4], "NRPS"= brewer.pal(8,"Dark2")[3],
           "NRPS-like"= brewer.pal(8,"Dark2")[5], "NRPS,T1PKS"= brewer.pal(8,"Dark2")[7], "T1PKS"= brewer.pal(8,"Dark2")[6], 
               "T3PKS" = brewer.pal(8,"Dark2")[8], "terpene"= brewer.pal(8,"Dark2")[2])
    ra = rowAnnotation(df = ra,col = list(Description = ra_colors))
    
    HM_core = Heatmap(filt_mat_core, show_row_names = F, show_column_names = T,
                      row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
          cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"),
          top_annotation = ta,
          right_annotation = ra)
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    pdf(file="../results/all_core_SM_heatmap.pdf",width=8,height=8)
    draw(HM_core)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    #get order of the rows in the heatmap and match to b values
    rord = row_order(HM_core)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
    rord<-rownames(filt_mat_core)[rord]
    core_dge<-data.frame("TranscriptID"=rord) %>% left_join(antismash_core) %>%
      filter(TranscriptID!="XANPAGTX0501_001694-T1"|Cluster!="Xp_GTX0501_2_Cluster_6") %>%
      select(TranscriptID,b,Description)
    head(core_dge)
    ##             TranscriptID         b      Description
    ## 1 XANPAGTX0501_001972-T1  2.363522             NRPS
    ## 2 XANPAGTX0501_001973-T1  2.358272            T1PKS
    ## 3 XANPAGTX0501_009715-T1  2.760824 fungal-RiPP-like
    ## 4 XANPAGTX0501_009716-T1  2.958263 fungal-RiPP-like
    ## 5 XANPAGTX0501_009866-T1        NA          terpene
    ## 6 XANPAGTX0501_001064-T1 -1.531609 fungal-RiPP-like
    • Make a heat map for putative effectors
      • Include all genes from the ‘cluster of interest’ form the clustering of secretome based on the structures
    cl_int<-read.delim2("../../04_secretome/analysis_and_temp_files/05_cluster_structures/clustering_results.txt") %>% filter(ClusterID %in% c("cl04","cl16","cl18","cl19","cl20","cl21","cl22","cl23",
                          "cl24","cl24a","cl37","cl38","cl40","cl41","cl42","cl80",
                          "cl29","cl34"),Gene_Expression!="Non-DGE")
    filt_mat_int<-subset(trans_mat, rownames(trans_mat) %in% cl_int$ProteinID)
    
    #define row annotation
    ra2<-data.frame("ProteinID"=rownames(filt_mat_int)) %>% left_join(cl_int) %>%
      select(ClusterID)
    
    #define color to match the tree
    col_cl<-read.delim2("../../04_secretome/analysis_and_temp_files/05_cluster_structures/cluster_col.txt",sep=",") %>% filter(cluster_id %in% cl_int$ClusterID)
    ra2_colors<-as.character(col_cl$color)
    names(ra2_colors)<-as.character(col_cl$cluster_id)
    ra2 = rowAnnotation(df = ra2,col = list(ClusterID = ra2_colors),show_legend = F)
    ta2 = HeatmapAnnotation(df=s2c,col = list(condition = ta_colors),show_legend = F)
    
    HM_int = Heatmap(filt_mat_int, show_row_names = T, show_column_names = T,
                      row_title_rot = 0, col=col_fun, show_heatmap_legend = F,
          cluster_columns = FALSE,column_order = colord,
          top_annotation = ta2,
          right_annotation = ra2,
          column_names_gp = gpar(fontsize = 6),
           row_names_gp = gpar(fontsize = 6))
    
    pdf(file="../results/all_secretome_int_heatmap.pdf",width=4,height=4)
    draw(HM_int)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    #get order of rows to match to clusters
    rord = row_order(HM_int)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
    rord<-rownames(filt_mat_int)[rord]
    rord2<-data.frame("ProteinID"=rord) %>% left_join(cl_int)
    
    #get number of DGE per cluster of interest
    t<-read.delim2("../../04_secretome/analysis_and_temp_files/05_cluster_structures/clustering_results.txt") %>% filter(ClusterID %in% c("cl04","cl16","cl18","cl19","cl20","cl21","cl22","cl23",
                          "cl24","cl24a","cl37","cl38","cl40","cl41","cl42","cl80",
                          "cl29","cl34")) 
    
    t%>% group_by(ClusterID,Gene_Expression) %>% summarise(n=n()) %>%
      pivot_wider(names_from = Gene_Expression,values_from = n,values_fill=0)
    ## # A tibble: 18 × 3
    ## # Groups:   ClusterID [18]
    ##    ClusterID `Non-DGE` `Upregulated in lichen`
    ##    <chr>         <int>                   <int>
    ##  1 cl04              6                       2
    ##  2 cl16              2                       0
    ##  3 cl18              2                       4
    ##  4 cl19              3                       1
    ##  5 cl20              7                       0
    ##  6 cl21              4                       1
    ##  7 cl22              0                       2
    ##  8 cl23              2                       1
    ##  9 cl24              5                       4
    ## 10 cl24a             6                       5
    ## 11 cl29              3                       1
    ## 12 cl34              3                       1
    ## 13 cl37              1                       1
    ## 14 cl38              3                       1
    ## 15 cl40              1                       1
    ## 16 cl41              0                       3
    ## 17 cl42              0                       1
    ## 18 cl80              1                       1

    Check for chromosomal clusters in DGE

    write.table(geneList5,'../analysis_and_temp_files/08_dge_culture_lichen/lichen_upregulated.txt',col.names = F,row.names = F,quote = F)
    write.table(geneList6,'../analysis_and_temp_files/08_dge_culture_lichen/culture_upregulated.txt',col.names = F,row.names = F,quote = F)

    perl croc.pl –reg ~/Documents/Lichen_project_TSL/03_transcriptomic_analysis/analysis_and_temp_files/08_dge_culture_lichen/culture_upregulated.txt –ref ~/Documents/Lichen_project_TSL/03_transcriptomic_analysis/analysis_and_temp_files/09_dge_architecture/reference.txt

    * In lichen-upregulated genes, got 92 clusters. 12 of them have only 3 genes, and 83 <10 genes. Maximum count is 19
    
    ```r
    croc_l<-read.delim("../analysis_and_temp_files/08_dge_culture_lichen/croc_lichen_upregulated.txt",header=F)
    croc_l$cluster<-paste0("cluster",1:nrow(croc_l))
    croc_l<-croc_l %>% separate_longer_delim(V9, delim = " ") %>% filter(V9!="") %>% 
      select(-c(V7,V8,V2,V3))
    colnames(croc_l)<-c("contig","start","end","pval","TranscriptID","cluster")
    croc_l %>% group_by(cluster) %>% summarize(n=n()) %>% arrange(desc(n)) %>% head()
    ## # A tibble: 6 × 2
    ##   cluster       n
    ##   <chr>     <int>
    ## 1 cluster11    19
    ## 2 cluster9     17
    ## 3 cluster52    16
    ## 4 cluster42    14
    ## 5 cluster92    14
    ## 6 cluster34    12
    croc_l2<-croc_l %>% left_join(funannot2) %>% 
      select(cluster,TranscriptID, InterPro_new,secreted_consensus,antiSMASH) 
    ## Joining with `by = join_by(TranscriptID)`
    croc_l2 %>% 
      kable(format = "html", col.names = colnames(croc_l2)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "300px")
    cluster TranscriptID InterPro_new secreted_consensus antiSMASH
    cluster1 XANPAGTX0501_000227-T1 IPR001251 CRAL-TRIO lipid binding domain, IPR011074 CRAL/TRIO, N-terminal domain, IPR036273 CRAL/TRIO, N-terminal domain superfamily, IPR036865 CRAL-TRIO lipid binding domain superfamily FALSE NA
    cluster1 XANPAGTX0501_000228-T1 IPR008949 Isoprenoid synthase domain superfamily FALSE NA
    cluster1 XANPAGTX0501_000229-T1 NA FALSE NA
    cluster1 XANPAGTX0501_000230-T1 IPR000092 Polyprenyl synthetase, IPR008949 Isoprenoid synthase domain superfamily, IPR033749 Polyprenyl synthetase, conserved site FALSE NA
    cluster2 XANPAGTX0501_000414-T1 NA FALSE NA
    cluster2 XANPAGTX0501_000413-T1 NA FALSE NA
    cluster2 XANPAGTX0501_000420-T1 IPR013087 Zinc finger C2H2-type FALSE NA
    cluster2 XANPAGTX0501_000417-T1 NA FALSE NA
    cluster3 XANPAGTX0501_000443-T1 IPR001680 WD40 repeat, IPR007111 NACHT nucleoside triphosphatase, IPR007751 Domain of unknown function DUF676, lipase-like, IPR011044 Quinoprotein amine dehydrogenase, beta chain-like, IPR011047 Quinoprotein alcohol dehydrogenase-like superfamily, IPR012908 GPI inositol-deacylase PGAP1-like, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029058 Alpha/Beta hydrolase fold, IPR036322 WD40-repeat-containing domain superfamily FALSE NA
    cluster3 XANPAGTX0501_000443-T2 IPR001680 WD40 repeat, IPR007111 NACHT nucleoside triphosphatase, IPR007751 Domain of unknown function DUF676, lipase-like, IPR011044 Quinoprotein amine dehydrogenase, beta chain-like, IPR011047 Quinoprotein alcohol dehydrogenase-like superfamily, IPR012908 GPI inositol-deacylase PGAP1-like, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029058 Alpha/Beta hydrolase fold, IPR036322 WD40-repeat-containing domain superfamily FALSE NA
    cluster3 XANPAGTX0501_000447-T1 NA FALSE NA
    cluster3 XANPAGTX0501_000448-T1 NA FALSE NA
    cluster4 XANPAGTX0501_000576-T1 IPR006162 Phosphopantetheine attachment site, IPR013968 Polyketide synthase, ketoreductase domain, IPR020806 Polyketide synthase, phosphopantetheine-binding domain, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily FALSE NA
    cluster4 XANPAGTX0501_000577-T1 IPR011032 GroES-like superfamily, IPR013217 Methyltransferase type 12, IPR013968 Polyketide synthase, ketoreductase domain, IPR020807 Polyketide synthase, dehydratase domain, IPR020843 Polyketide synthase, enoylreductase domain, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036291 NAD(P)-binding domain superfamily, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE NA
    cluster4 XANPAGTX0501_000578-T1 IPR001227 Acyl transferase domain superfamily, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016036 Malonyl-CoA ACP transacylase, ACP-binding, IPR016039 Thiolase-like, IPR020801 None, IPR020807 Polyketide synthase, dehydratase domain, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR032821 Polyketide synthase, C-terminal extension, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE NA
    cluster4 XANPAGTX0501_000579-T1 IPR001227 Acyl transferase domain superfamily, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016039 Thiolase-like, IPR018201 Beta-ketoacyl synthase, active site, IPR020801 None, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR032821 Polyketide synthase, C-terminal extension FALSE NA
    cluster5 XANPAGTX0501_000601-T1 NA FALSE NA
    cluster5 XANPAGTX0501_000606-T1 NA TRUE NA
    cluster5 XANPAGTX0501_000602-T1 NA FALSE NA
    cluster5 XANPAGTX0501_000610-T1 NA FALSE NA
    cluster5 XANPAGTX0501_000604-T1 NA FALSE NA
    cluster5 XANPAGTX0501_000605-T1 NA FALSE NA
    cluster5 XANPAGTX0501_000609-T1 NA TRUE NA
    cluster5 XANPAGTX0501_000603-T1 IPR006076 FAD dependent oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily FALSE NA
    cluster6 XANPAGTX0501_000650-T1 NA FALSE NA
    cluster6 XANPAGTX0501_000655-T1 IPR035992 Ricin B-like lectins TRUE NA
    cluster6 XANPAGTX0501_000649-T1 NA FALSE NA
    cluster6 XANPAGTX0501_000656-T1 IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins TRUE NA
    cluster6 XANPAGTX0501_000647-T1 IPR000571 Zinc finger, CCCH-type FALSE NA
    cluster6 XANPAGTX0501_000648-T1 IPR022099 Domain of unknown function DUF3638, IPR022105 Domain of unknown function DUF3645, IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster6 XANPAGTX0501_000654-T1 IPR004843 Calcineurin-like phosphoesterase domain, ApaH type, IPR029052 Metallo-dependent phosphatase-like FALSE NA
    cluster7 XANPAGTX0501_000758-T1 IPR000757 Glycoside hydrolase family 16, IPR013320 Concanavalin A-like lectin/glucanase domain superfamily FALSE NA
    cluster7 XANPAGTX0501_000751-T1 NA FALSE NA
    cluster7 XANPAGTX0501_000757-T1 IPR001173 Glycosyltransferase 2-like, IPR029044 Nucleotide-diphospho-sugar transferases FALSE NA
    cluster7 XANPAGTX0501_000755-T1 NA FALSE NA
    cluster8 XANPAGTX0501_000872-T1 NA FALSE NA
    cluster8 XANPAGTX0501_000873-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR020904 Short-chain dehydrogenase/reductase, conserved site, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster8 XANPAGTX0501_000874-T1 IPR001525 C-5 cytosine methyltransferase, IPR018117 DNA methylase, C-5 cytosine-specific, active site, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR031303 DNA methylase, C-5 cytosine-specific, conserved site FALSE NA
    cluster8 XANPAGTX0501_000879-T1 IPR005198 Glycoside hydrolase, family 76, IPR008928 Six-hairpin glycosidase superfamily, IPR014480 Mannan endo-1,6-alpha-mannosidase FALSE NA
    cluster9 XANPAGTX0501_000922-T1 NA FALSE Cluster_2
    cluster9 XANPAGTX0501_000911-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster9 XANPAGTX0501_000916-T1 NA FALSE Cluster_2
    cluster9 XANPAGTX0501_000914-T1 NA FALSE Cluster_2
    cluster9 XANPAGTX0501_000888-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000909-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000899-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000891-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000910-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000892-T1 NA FALSE NA
    cluster9 XANPAGTX0501_000894-T1 IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster9 XANPAGTX0501_000917-T1 NA FALSE Cluster_2
    cluster9 XANPAGTX0501_000915-T1 NA FALSE Cluster_2
    cluster9 XANPAGTX0501_000897-T1 IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily FALSE NA
    cluster9 XANPAGTX0501_000923-T1 IPR008963 Purple acid phosphatase-like, N-terminal, IPR018946 PhoD-like phosphatase, metallophosphatase domain, IPR032093 Phospholipase D, N-terminal, IPR038607 PhoD-like superfamily TRUE Cluster_2
    cluster9 XANPAGTX0501_000908-T1 IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain, IPR011008 Dimeric alpha-beta barrel, IPR025444 Monooxygenase af470-like, IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily FALSE NA
    cluster9 XANPAGTX0501_000913-T1 NA FALSE Cluster_2
    cluster10 XANPAGTX0501_000949-T1 NA FALSE NA
    cluster10 XANPAGTX0501_000948-T1 NA FALSE NA
    cluster10 XANPAGTX0501_000952-T1 NA FALSE NA
    cluster10 XANPAGTX0501_000951-T1 NA FALSE NA
    cluster11 XANPAGTX0501_006311-T1 NA FALSE Cluster_2
    cluster11 XANPAGTX0501_006293-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE Cluster_1
    cluster11 XANPAGTX0501_006324-T1 IPR001204 Phosphate transporter FALSE Cluster_2
    cluster11 XANPAGTX0501_006295-T1 IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily FALSE Cluster_1
    cluster11 XANPAGTX0501_006314-T1 IPR001646 Pentapeptide repeat FALSE Cluster_2
    cluster11 XANPAGTX0501_006301-T1 IPR001585 Transaldolase/Fructose-6-phosphate aldolase, IPR013785 Aldolase-type TIM barrel, IPR018225 Transaldolase, active site FALSE Cluster_1
    cluster11 XANPAGTX0501_006302-T1 IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_1
    cluster11 XANPAGTX0501_006298-T1 IPR005123 Oxoglutarate/iron-dependent dioxygenase, IPR026992 Non-haem dioxygenase N-terminal domain, IPR027443 Isopenicillin N synthase-like superfamily FALSE Cluster_1
    cluster11 XANPAGTX0501_006320-T1 NA FALSE Cluster_2
    cluster11 XANPAGTX0501_006319-T1 IPR001031 Thioesterase, IPR001227 Acyl transferase domain superfamily, IPR006162 Phosphopantetheine attachment site, IPR009081 Phosphopantetheine binding ACP domain, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016036 Malonyl-CoA ACP transacylase, ACP-binding, IPR016039 Thiolase-like, IPR018201 Beta-ketoacyl synthase, active site, IPR020801 None, IPR020806 Polyketide synthase, phosphopantetheine-binding domain, IPR020807 Polyketide synthase, dehydratase domain, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR029058 Alpha/Beta hydrolase fold, IPR030918 Polyketide product template domain, IPR032088 Starter unit:ACP transacylase, IPR036736 ACP-like superfamily, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE Cluster_2
    cluster11 XANPAGTX0501_006304-T1 NA FALSE Cluster_1
    cluster11 XANPAGTX0501_006317-T1 IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE Cluster_2
    cluster11 XANPAGTX0501_006318-T1 IPR009799 EthD domain, IPR011008 Dimeric alpha-beta barrel FALSE Cluster_2
    cluster11 XANPAGTX0501_006303-T1 NA FALSE Cluster_1
    cluster11 XANPAGTX0501_006294-T1 NA FALSE Cluster_1
    cluster11 XANPAGTX0501_006308-T1 NA FALSE Cluster_1
    cluster11 XANPAGTX0501_006296-T1 IPR001173 Glycosyltransferase 2-like, IPR029044 Nucleotide-diphospho-sugar transferases FALSE Cluster_1
    cluster11 XANPAGTX0501_006291-T1 NA TRUE Cluster_1
    cluster11 XANPAGTX0501_006292-T1 NA TRUE Cluster_1
    cluster12 XANPAGTX0501_006907-T1 IPR041622 SMODS and SLOG-associating 2TM effector domain, fungi FALSE NA
    cluster12 XANPAGTX0501_006918-T1 IPR010255 Haem peroxidase superfamily, IPR017972 Cytochrome P450, conserved site, IPR019791 Haem peroxidase, animal-type, IPR034812 Psi-factor producing oxygenase, N-terminal heme peroxidase domain, IPR036396 Cytochrome P450 superfamily FALSE NA
    cluster12 XANPAGTX0501_006903-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006917-T1 IPR018712 Domain of unknown function DUF2235 FALSE NA
    cluster12 XANPAGTX0501_006919-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006904-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006910-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006906-T1 IPR024498 Domain of unknown function DUF2786 FALSE NA
    cluster12 XANPAGTX0501_006909-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006921-T1 NA FALSE NA
    cluster12 XANPAGTX0501_006922-T1 IPR004313 Acireductone dioxygenase ARD family, IPR011051 RmlC-like cupin domain superfamily, IPR014710 RmlC-like jelly roll fold, IPR027496 Acireductone dioxygenase, eukaryotes FALSE NA
    cluster13 XANPAGTX0501_006951-T1 NA FALSE NA
    cluster13 XANPAGTX0501_006950-T1 NA FALSE NA
    cluster13 XANPAGTX0501_006945-T1 NA FALSE NA
    cluster13 XANPAGTX0501_006946-T1 NA FALSE NA
    cluster13 XANPAGTX0501_006957-T1 NA FALSE NA
    cluster13 XANPAGTX0501_006953-T1 NA FALSE NA
    cluster14 XANPAGTX0501_006970-T1 NA FALSE NA
    cluster14 XANPAGTX0501_006972-T1 NA FALSE NA
    cluster14 XANPAGTX0501_006967-T1 NA TRUE NA
    cluster14 XANPAGTX0501_006971-T1 NA FALSE NA
    cluster14 XANPAGTX0501_006974-T1 NA FALSE NA
    cluster15 XANPAGTX0501_007248-T1 IPR019313 Mediator complex, subunit Med17 FALSE NA
    cluster15 XANPAGTX0501_007251-T1 IPR007111 NACHT nucleoside triphosphatase, IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster15 XANPAGTX0501_007247-T1 IPR000023 Phosphofructokinase domain, IPR012003 ATP-dependent 6-phosphofructokinase, prokaryotic-type, IPR015912 Phosphofructokinase, conserved site, IPR022953 ATP-dependent 6-phosphofructokinase, IPR035966 Phosphofructokinase superfamily FALSE NA
    cluster15 XANPAGTX0501_007245-T1 NA FALSE NA
    cluster15 XANPAGTX0501_007244-T1 NA FALSE NA
    cluster15 XANPAGTX0501_007246-T1 IPR000023 Phosphofructokinase domain, IPR015912 Phosphofructokinase, conserved site, IPR022953 ATP-dependent 6-phosphofructokinase, IPR035966 Phosphofructokinase superfamily FALSE NA
    cluster15 XANPAGTX0501_007242-T1 NA FALSE NA
    cluster15 XANPAGTX0501_007253-T1 NA FALSE NA
    cluster16 XANPAGTX0501_007271-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_1
    cluster16 XANPAGTX0501_007267-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE Cluster_1
    cluster16 XANPAGTX0501_007272-T1 NA TRUE Cluster_1
    cluster16 XANPAGTX0501_007268-T1 IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily TRUE Cluster_1
    cluster17 XANPAGTX0501_007313-T1 NA FALSE NA
    cluster17 XANPAGTX0501_007308-T1 NA FALSE NA
    cluster17 XANPAGTX0501_007303-T1 IPR018712 Domain of unknown function DUF2235, IPR029058 Alpha/Beta hydrolase fold, IPR036404 Jacalin-like lectin domain superfamily FALSE NA
    cluster17 XANPAGTX0501_007310-T1 IPR002151 Kinesin light chain, IPR011990 Tetratricopeptide-like helical domain superfamily, IPR013026 None, IPR013087 Zinc finger C2H2-type, IPR019734 Tetratricopeptide repeat FALSE NA
    cluster17 XANPAGTX0501_007312-T1 NA FALSE NA
    cluster17 XANPAGTX0501_007311-T1 IPR002182 NB-ARC, IPR010730 Heterokaryon incompatibility, IPR011990 Tetratricopeptide-like helical domain superfamily, IPR013026 None, IPR019734 Tetratricopeptide repeat, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031352 NACHT-NTPase and P-loop NTPases, N-terminal domain, IPR041664 Orc1-like, AAA ATPase domain FALSE NA
    cluster17 XANPAGTX0501_007309-T1 IPR040976 Fungal-type protein kinase FALSE NA
    cluster18 XANPAGTX0501_007344-T1 NA FALSE Cluster_2
    cluster18 XANPAGTX0501_007361-T1 NA FALSE Cluster_2
    cluster18 XANPAGTX0501_007362-T1 NA TRUE Cluster_2
    cluster18 XANPAGTX0501_007353-T1 NA FALSE Cluster_2
    cluster18 XANPAGTX0501_007349-T1 NA FALSE Cluster_2
    cluster18 XANPAGTX0501_007351-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR001242 Condensation domain, IPR009081 Phosphopantetheine binding ACP domain, IPR020845 AMP-binding, conserved site, IPR023213 Chloramphenicol acetyltransferase-like domain superfamily, IPR025110 AMP-binding enzyme, C-terminal domain, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE Cluster_2
    cluster18 XANPAGTX0501_007352-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR001242 Condensation domain, IPR009081 Phosphopantetheine binding ACP domain, IPR020845 AMP-binding, conserved site, IPR023213 Chloramphenicol acetyltransferase-like domain superfamily, IPR025110 AMP-binding enzyme, C-terminal domain, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE Cluster_2
    cluster18 XANPAGTX0501_007350-T1 IPR020845 AMP-binding, conserved site, IPR042099 ANL, N-terminal domain FALSE Cluster_2
    cluster18 XANPAGTX0501_007343-T1 NA FALSE Cluster_2
    cluster19 XANPAGTX0501_007397-T1 NA FALSE Cluster_3
    cluster19 XANPAGTX0501_007403-T1 NA FALSE NA
    cluster19 XANPAGTX0501_007404-T1 IPR001002 Chitin-binding, type 1, IPR001223 Glycoside hydrolase family 18, catalytic domain, IPR011583 Chitinase II, IPR017853 Glycoside hydrolase superfamily, IPR036861 Endochitinase-like superfamily FALSE NA
    cluster19 XANPAGTX0501_007405-T1 IPR001002 Chitin-binding, type 1, IPR002509 NodB homology domain, IPR011330 Glycoside hydrolase/deacetylase, beta/alpha-barrel, IPR017853 Glycoside hydrolase superfamily, IPR018371 Chitin-binding, type 1, conserved site, IPR029070 Chitinase insertion domain superfamily, IPR036852 Peptidase S8/S53 domain superfamily, IPR036861 Endochitinase-like superfamily FALSE NA
    cluster20 XANPAGTX0501_007872-T1 NA FALSE Cluster_1
    cluster20 XANPAGTX0501_007870-T1 NA FALSE Cluster_1
    cluster20 XANPAGTX0501_007865-T1 NA TRUE NA
    cluster20 XANPAGTX0501_007863-T1 NA FALSE NA
    cluster20 XANPAGTX0501_007866-T1 NA FALSE NA
    cluster20 XANPAGTX0501_007862-T1 NA FALSE NA
    cluster20 XANPAGTX0501_007864-T1 NA FALSE NA
    cluster20 XANPAGTX0501_007873-T1 NA FALSE Cluster_1
    cluster20 XANPAGTX0501_007869-T1 IPR000210 BTB/POZ domain, IPR011333 SKP1/BTB/POZ domain superfamily FALSE Cluster_1
    cluster21 XANPAGTX0501_007923-T1 IPR000608 Ubiquitin-conjugating enzyme E2, IPR016135 Ubiquitin-conjugating enzyme/RWD-like FALSE NA
    cluster21 XANPAGTX0501_007933-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007932-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007929-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007931-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007934-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007930-T1 NA FALSE NA
    cluster21 XANPAGTX0501_007939-T1 NA TRUE NA
    cluster22 XANPAGTX0501_007969-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster22 XANPAGTX0501_007973-T1 NA FALSE NA
    cluster22 XANPAGTX0501_007970-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster22 XANPAGTX0501_007972-T1 NA FALSE NA
    cluster22 XANPAGTX0501_007971-T1 NA FALSE NA
    cluster22 XANPAGTX0501_007976-T1 NA TRUE NA
    cluster23 XANPAGTX0501_008007-T1 NA FALSE NA
    cluster23 XANPAGTX0501_008008-T1 NA FALSE NA
    cluster23 XANPAGTX0501_008009-T1 NA FALSE NA
    cluster24 XANPAGTX0501_008019-T1 NA TRUE NA
    cluster24 XANPAGTX0501_008029-T1 NA FALSE NA
    cluster24 XANPAGTX0501_008031-T1 NA FALSE NA
    cluster24 XANPAGTX0501_008032-T1 NA FALSE NA
    cluster24 XANPAGTX0501_008024-T2 IPR007192 Cdc23, IPR011990 Tetratricopeptide-like helical domain superfamily, IPR013026 None, IPR019734 Tetratricopeptide repeat FALSE NA
    cluster24 XANPAGTX0501_008030-T1 IPR001128 Cytochrome P450, IPR002401 Cytochrome P450, E-class, group I, IPR017972 Cytochrome P450, conserved site, IPR036396 Cytochrome P450 superfamily FALSE NA
    cluster24 XANPAGTX0501_008041-T1 NA TRUE NA
    cluster24 XANPAGTX0501_008027-T1 IPR000028 Chloroperoxidase, IPR036851 Chloroperoxidase-like superfamily TRUE NA
    cluster24 XANPAGTX0501_008018-T1 NA FALSE NA
    cluster24 XANPAGTX0501_008028-T1 NA FALSE NA
    cluster25 XANPAGTX0501_008272-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR006162 Phosphopantetheine attachment site, IPR009081 Phosphopantetheine binding ACP domain, IPR013120 Fatty acyl-coenzyme A reductase, NAD-binding domain, IPR020845 AMP-binding, conserved site, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE Cluster_1
    cluster25 XANPAGTX0501_008267-T1 IPR024079 Metallopeptidase, catalytic domain superfamily FALSE Cluster_1
    cluster25 XANPAGTX0501_008268-T1 NA FALSE Cluster_1
    cluster25 XANPAGTX0501_008274-T1 NA FALSE Cluster_1
    cluster25 XANPAGTX0501_008270-T1 NA TRUE Cluster_1
    cluster25 XANPAGTX0501_008273-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR006162 Phosphopantetheine attachment site, IPR009081 Phosphopantetheine binding ACP domain, IPR013120 Fatty acyl-coenzyme A reductase, NAD-binding domain, IPR020845 AMP-binding, conserved site, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE Cluster_1
    cluster26 XANPAGTX0501_008284-T1 IPR029044 Nucleotide-diphospho-sugar transferases FALSE Cluster_1
    cluster26 XANPAGTX0501_008288-T1 NA FALSE NA
    cluster26 XANPAGTX0501_008289-T1 NA FALSE NA
    cluster26 XANPAGTX0501_008291-T1 NA FALSE NA
    cluster27 XANPAGTX0501_008409-T1 IPR001680 WD40 repeat, IPR011659 WD40-like beta propeller, IPR011990 Tetratricopeptide-like helical domain superfamily, IPR013026 None, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR017986 None, IPR019734 Tetratricopeptide repeat, IPR019775 WD40 repeat, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036322 WD40-repeat-containing domain superfamily FALSE NA
    cluster27 XANPAGTX0501_008413-T1 NA FALSE NA
    cluster27 XANPAGTX0501_008414-T1 NA FALSE NA
    cluster27 XANPAGTX0501_008417-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR020904 Short-chain dehydrogenase/reductase, conserved site, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster27 XANPAGTX0501_008418-T1 IPR023213 Chloramphenicol acetyltransferase-like domain superfamily FALSE NA
    cluster28 XANPAGTX0501_008435-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR013595 Peptidase S33 tripeptidyl aminopeptidase-like, C-terminal, IPR029058 Alpha/Beta hydrolase fold FALSE NA
    cluster28 XANPAGTX0501_008438-T1 NA FALSE NA
    cluster28 XANPAGTX0501_008437-T1 IPR042099 ANL, N-terminal domain FALSE NA
    cluster28 XANPAGTX0501_008439-T1 IPR003819 TauD/TfdA-like domain, IPR042098 Taurine dioxygenase TauD-like superfamily FALSE NA
    cluster28 XANPAGTX0501_008436-T1 NA FALSE NA
    cluster29 XANPAGTX0501_008541-T1 NA FALSE NA
    cluster29 XANPAGTX0501_008525-T1 NA FALSE NA
    cluster29 XANPAGTX0501_008533-T1 NA FALSE NA
    cluster29 XANPAGTX0501_008529-T1 NA FALSE NA
    cluster29 XANPAGTX0501_008534-T1 NA FALSE NA
    cluster30 XANPAGTX0501_008676-T1 NA FALSE NA
    cluster30 XANPAGTX0501_008677-T1 IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster30 XANPAGTX0501_008683-T1 NA TRUE NA
    cluster31 XANPAGTX0501_008863-T1 NA FALSE Cluster_1
    cluster31 XANPAGTX0501_008852-T1 IPR001227 Acyl transferase domain superfamily, IPR009081 Phosphopantetheine binding ACP domain, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016036 Malonyl-CoA ACP transacylase, ACP-binding, IPR016039 Thiolase-like, IPR020801 None, IPR020806 Polyketide synthase, phosphopantetheine-binding domain, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR030918 Polyketide product template domain, IPR032088 Starter unit:ACP transacylase, IPR032821 Polyketide synthase, C-terminal extension, IPR036736 ACP-like superfamily, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE Cluster_1
    cluster31 XANPAGTX0501_008851-T1 IPR001279 Metallo-beta-lactamase, IPR036866 Ribonuclease Z/Hydroxyacylglutathione hydrolase-like FALSE Cluster_1
    cluster31 XANPAGTX0501_008856-T1 IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily FALSE Cluster_1
    cluster31 XANPAGTX0501_008850-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR010929 CDR ABC transporter, IPR013525 ABC-2 type transporter, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029481 ABC-transporter, N-terminal domain, IPR034001 ATP-binding cassette transporter, PDR-like subfamily G, domain 1, IPR034003 ATP-binding cassette transporter, PDR-like subfamily G, domain 2 FALSE Cluster_1
    cluster31 XANPAGTX0501_008853-T1 IPR009799 EthD domain, IPR011008 Dimeric alpha-beta barrel FALSE Cluster_1
    cluster31 XANPAGTX0501_008855-T1 IPR000210 BTB/POZ domain, IPR011333 SKP1/BTB/POZ domain superfamily FALSE Cluster_1
    cluster32 XANPAGTX0501_008875-T1 NA TRUE NA
    cluster32 XANPAGTX0501_008876-T1 NA FALSE NA
    cluster32 XANPAGTX0501_008877-T1 NA FALSE NA
    cluster33 XANPAGTX0501_009046-T1 NA FALSE NA
    cluster33 XANPAGTX0501_009041-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster33 XANPAGTX0501_009040-T1 NA FALSE NA
    cluster33 XANPAGTX0501_009055-T1 IPR010730 Heterokaryon incompatibility FALSE NA
    cluster33 XANPAGTX0501_009052-T1 NA FALSE NA
    cluster33 XANPAGTX0501_009054-T1 NA FALSE NA
    cluster33 XANPAGTX0501_009050-T1 NA FALSE NA
    cluster34 XANPAGTX0501_009194-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster34 XANPAGTX0501_009204-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster34 XANPAGTX0501_009201-T1 NA FALSE NA
    cluster34 XANPAGTX0501_009192-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster34 XANPAGTX0501_009196-T1 NA FALSE NA
    cluster34 XANPAGTX0501_009202-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster34 XANPAGTX0501_009199-T1 NA FALSE NA
    cluster34 XANPAGTX0501_009189-T1 NA TRUE NA
    cluster34 XANPAGTX0501_009193-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster34 XANPAGTX0501_009203-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster34 XANPAGTX0501_009205-T1 NA FALSE NA
    cluster34 XANPAGTX0501_009195-T1 NA FALSE NA
    cluster35 XANPAGTX0501_000971-T1 IPR011009 Protein kinase-like domain superfamily FALSE NA
    cluster35 XANPAGTX0501_000972-T1 NA FALSE NA
    cluster35 XANPAGTX0501_000974-T1 IPR000573 Aconitase A/isopropylmalate dehydratase small subunit, swivel domain, IPR001030 Aconitase/3-isopropylmalate dehydratase large subunit, alpha/beta/alpha domain, IPR004418 Homoaconitase, mitochondrial, IPR015928 Aconitase/3-isopropylmalate dehydratase, swivel, IPR015931 Aconitase/3-isopropylmalate dehydratase large subunit, alpha/beta/alpha, subdomain 1/3, IPR018136 Aconitase family, 4Fe-4S cluster binding site, IPR036008 Aconitase, iron-sulfur domain, IPR039386 Homoaconitase, swivel domain FALSE NA
    cluster35 XANPAGTX0501_000978-T1 NA FALSE NA
    cluster35 XANPAGTX0501_000973-T1 IPR000192 Aminotransferase class V domain, IPR015421 Pyridoxal phosphate-dependent transferase, major domain, IPR015422 Pyridoxal phosphate-dependent transferase, small domain, IPR015424 Pyridoxal phosphate-dependent transferase, IPR022278 Phosphoserine aminotransferase FALSE NA
    cluster36 XANPAGTX0501_001015-T1 IPR001153 Barwin domain, IPR009009 RlpA-like protein, double-psi beta-barrel domain, IPR036908 RlpA-like domain superfamily TRUE NA
    cluster36 XANPAGTX0501_001008-T1 NA FALSE NA
    cluster36 XANPAGTX0501_001017-T1 IPR007111 NACHT nucleoside triphosphatase, IPR024983 CHAT domain, IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster36 XANPAGTX0501_001010-T1 NA FALSE NA
    cluster36 XANPAGTX0501_001014-T1 NA FALSE NA
    cluster37 XANPAGTX0501_001063-T1 NA TRUE Cluster_1
    cluster37 XANPAGTX0501_001064-T1 IPR021765 Mycotoxin biosynthesis protein UstYa-like FALSE Cluster_1
    cluster37 XANPAGTX0501_001062-T1 NA FALSE Cluster_1
    cluster37 XANPAGTX0501_001061-T1 IPR002328 Alcohol dehydrogenase, zinc-type, conserved site, IPR011032 GroES-like superfamily, IPR013149 Alcohol dehydrogenase-like, C-terminal, IPR013154 Alcohol dehydrogenase-like, N-terminal, IPR020843 Polyketide synthase, enoylreductase domain, IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_1
    cluster38 XANPAGTX0501_001178-T1 NA FALSE NA
    cluster38 XANPAGTX0501_001183-T1 NA TRUE NA
    cluster38 XANPAGTX0501_001184-T1 NA FALSE NA
    cluster38 XANPAGTX0501_001185-T1 NA FALSE NA
    cluster39 XANPAGTX0501_001458-T1 IPR002889 Carbohydrate-binding WSC, IPR018535 Domain of unknown function DUF1996 TRUE NA
    cluster39 XANPAGTX0501_001455-T1 IPR005556 SUN family TRUE NA
    cluster39 XANPAGTX0501_001453-T1 IPR010255 Haem peroxidase superfamily, IPR019791 Haem peroxidase, animal-type, IPR034812 Psi-factor producing oxygenase, N-terminal heme peroxidase domain, IPR036396 Cytochrome P450 superfamily, IPR037120 Haem peroxidase domain superfamily, animal type FALSE NA
    cluster39 XANPAGTX0501_001462-T1 IPR008775 Phytanoyl-CoA dioxygenase-like FALSE NA
    cluster39 XANPAGTX0501_001452-T1 IPR018712 Domain of unknown function DUF2235 FALSE NA
    cluster39 XANPAGTX0501_001451-T1 NA FALSE NA
    cluster39 XANPAGTX0501_001465-T1 NA FALSE NA
    cluster39 XANPAGTX0501_001448-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE NA
    cluster40 XANPAGTX0501_001497-T1 NA FALSE NA
    cluster40 XANPAGTX0501_001498-T1 IPR012340 Nucleic acid-binding, OB-fold FALSE NA
    cluster40 XANPAGTX0501_001496-T1 NA FALSE NA
    cluster41 XANPAGTX0501_001710-T1 IPR007577 Glycosyltransferase, DXD sugar-binding motif, IPR029044 Nucleotide-diphospho-sugar transferases FALSE NA
    cluster41 XANPAGTX0501_001712-T1 IPR001227 Acyl transferase domain superfamily, IPR006162 Phosphopantetheine attachment site, IPR009081 Phosphopantetheine binding ACP domain, IPR011032 GroES-like superfamily, IPR013154 Alcohol dehydrogenase-like, N-terminal, IPR013216 Methyltransferase type 11, IPR013217 Methyltransferase type 12, IPR013968 Polyketide synthase, ketoreductase domain, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016036 Malonyl-CoA ACP transacylase, ACP-binding, IPR016039 Thiolase-like, IPR018201 Beta-ketoacyl synthase, active site, IPR020801 None, IPR020806 Polyketide synthase, phosphopantetheine-binding domain, IPR020807 Polyketide synthase, dehydratase domain, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR020843 Polyketide synthase, enoylreductase domain, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR032821 Polyketide synthase, C-terminal extension, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE NA
    cluster41 XANPAGTX0501_010750-T1 NA FALSE NA
    cluster41 XANPAGTX0501_001713-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR005152 Lipase, secreted, IPR022742 Serine aminopeptidase, S33, IPR029058 Alpha/Beta hydrolase fold FALSE NA
    cluster41 XANPAGTX0501_001708-T1 NA TRUE NA
    cluster42 XANPAGTX0501_009317-T1 IPR008979 Galactose-binding-like domain superfamily TRUE NA
    cluster42 XANPAGTX0501_009340-T1 NA TRUE NA
    cluster42 XANPAGTX0501_009318-T1 IPR011333 SKP1/BTB/POZ domain superfamily FALSE NA
    cluster42 XANPAGTX0501_009342-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009341-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009322-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009326-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009321-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009316-T1 IPR001810 F-box domain, IPR036047 F-box-like domain superfamily FALSE NA
    cluster42 XANPAGTX0501_009311-T1 IPR008427 Extracellular membrane protein, CFEM domain TRUE NA
    cluster42 XANPAGTX0501_009320-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster42 XANPAGTX0501_009310-T1 IPR032805 Wax synthase domain FALSE NA
    cluster42 XANPAGTX0501_009331-T1 NA FALSE NA
    cluster42 XANPAGTX0501_009332-T1 NA FALSE NA
    cluster43 XANPAGTX0501_009435-T1 NA FALSE NA
    cluster43 XANPAGTX0501_009437-T1 NA FALSE NA
    cluster43 XANPAGTX0501_009438-T1 NA FALSE NA
    cluster43 XANPAGTX0501_009439-T1 NA FALSE NA
    cluster43 XANPAGTX0501_009436-T1 IPR000330 SNF2, N-terminal, IPR001650 Helicase, C-terminal, IPR001841 Zinc finger, RING-type, IPR013083 Zinc finger, RING/FYVE/PHD-type, IPR014001 Helicase superfamily 1/2, ATP-binding domain, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR038718 SNF2-like, N-terminal domain superfamily FALSE NA
    cluster44 XANPAGTX0501_009558-T1 IPR000719 Protein kinase domain, IPR011009 Protein kinase-like domain superfamily, IPR036465 von Willebrand factor A-like domain superfamily FALSE NA
    cluster44 XANPAGTX0501_009557-T1 IPR003615 HNH nuclease FALSE NA
    cluster44 XANPAGTX0501_009566-T1 NA FALSE NA
    cluster44 XANPAGTX0501_009555-T1 NA FALSE NA
    cluster44 XANPAGTX0501_009559-T1 NA FALSE NA
    cluster45 XANPAGTX0501_009670-T1 IPR000845 Nucleoside phosphorylase domain, IPR002110 Ankyrin repeat, IPR007111 NACHT nucleoside triphosphatase, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR035994 Nucleoside phosphorylase superfamily, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster45 XANPAGTX0501_009672-T1 NA FALSE NA
    cluster45 XANPAGTX0501_009671-T1 NA FALSE NA
    cluster45 XANPAGTX0501_009668-T1 NA TRUE NA
    cluster45 XANPAGTX0501_009673-T1 IPR003593 AAA+ ATPase domain, IPR003959 ATPase, AAA-type, core, IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster45 XANPAGTX0501_009669-T1 IPR003689 Zinc/iron permease FALSE NA
    cluster45 XANPAGTX0501_009666-T1 NA FALSE NA
    cluster46 XANPAGTX0501_009688-T1 IPR000254 Cellulose-binding domain, fungal, IPR035971 Cellulose-binding domain superfamily, IPR036908 RlpA-like domain superfamily TRUE NA
    cluster46 XANPAGTX0501_009689-T1 NA TRUE NA
    cluster46 XANPAGTX0501_009690-T1 NA TRUE NA
    cluster47 XANPAGTX0501_009730-T1 NA TRUE Cluster_1
    cluster47 XANPAGTX0501_009731-T1 IPR005197 Glycoside hydrolase family 71 TRUE Cluster_1
    cluster47 XANPAGTX0501_009734-T1 NA FALSE Cluster_1
    cluster47 XANPAGTX0501_009738-T1 IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily, IPR036047 F-box-like domain superfamily FALSE NA
    cluster48 XANPAGTX0501_009872-T1 NA FALSE Cluster_1
    cluster48 XANPAGTX0501_009876-T1 IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily FALSE NA
    cluster48 XANPAGTX0501_009883-T1 NA FALSE NA
    cluster48 XANPAGTX0501_009887-T1 NA TRUE NA
    cluster48 XANPAGTX0501_009875-T1 NA FALSE NA
    cluster48 XANPAGTX0501_009886-T1 NA TRUE NA
    cluster48 XANPAGTX0501_009881-T2 IPR004212 GTF2I-like repeat FALSE NA
    cluster48 XANPAGTX0501_009873-T1 NA FALSE Cluster_1
    cluster49 XANPAGTX0501_010016-T2 IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily FALSE NA
    cluster49 XANPAGTX0501_010022-T1 NA TRUE NA
    cluster49 XANPAGTX0501_010026-T1 IPR018392 LysM domain, IPR036779 LysM domain superfamily TRUE NA
    cluster49 XANPAGTX0501_010021-T1 NA TRUE NA
    cluster49 XANPAGTX0501_010016-T1 IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily FALSE NA
    cluster49 XANPAGTX0501_010023-T1 NA FALSE NA
    cluster49 XANPAGTX0501_010027-T1 NA TRUE NA
    cluster50 XANPAGTX0501_010035-T1 IPR002922 Thiazole biosynthetic enzyme Thi4 family, IPR027495 Thiamine thiazole synthase, IPR036188 FAD/NAD(P)-binding domain superfamily FALSE NA
    cluster50 XANPAGTX0501_010040-T1 NA TRUE NA
    cluster50 XANPAGTX0501_010044-T1 IPR002937 Amine oxidase, IPR036188 FAD/NAD(P)-binding domain superfamily TRUE NA
    cluster50 XANPAGTX0501_010038-T3 IPR000581 Dihydroxy-acid/6-phosphogluconate dehydratase, IPR004404 Dihydroxy-acid dehydratase, IPR020558 Dihydroxy-acid/6-phosphogluconate dehydratase, conserved site, IPR037237 IlvD/EDD, N-terminal domain, IPR042096 Dihydroxy-acid dehydratase, C-terminal FALSE NA
    cluster50 XANPAGTX0501_010045-T1 IPR002937 Amine oxidase, IPR036188 FAD/NAD(P)-binding domain superfamily FALSE NA
    cluster50 XANPAGTX0501_010039-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001936-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001926-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001921-T1 IPR025714 Methyltransferase domain, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR041698 Methyltransferase domain 25 FALSE NA
    cluster51 XANPAGTX0501_001930-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001937-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001927-T1 NA FALSE NA
    cluster51 XANPAGTX0501_001935-T1 NA TRUE NA
    cluster52 XANPAGTX0501_001959-T1 IPR000095 CRIB domain, IPR000719 Protein kinase domain, IPR008271 Serine/threonine-protein kinase, active site, IPR011009 Protein kinase-like domain superfamily, IPR017441 Protein kinase, ATP binding site, IPR033923 p21 activated kinase binding domain, IPR036936 CRIB domain superfamily FALSE NA
    cluster52 XANPAGTX0501_001967-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001954-T1 NA FALSE NA
    cluster52 XANPAGTX0501_001965-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001970-T1 NA TRUE Cluster_1
    cluster52 XANPAGTX0501_001963-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001956-T1 NA FALSE NA
    cluster52 XANPAGTX0501_001962-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001953-T1 NA FALSE NA
    cluster52 XANPAGTX0501_001961-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001966-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001964-T1 IPR001611 Leucine-rich repeat, IPR003591 Leucine-rich repeat, typical subtype, IPR019487 RAM signalling pathway, SOG2, IPR032675 Leucine-rich repeat domain superfamily FALSE Cluster_1
    cluster52 XANPAGTX0501_001955-T1 IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain, IPR001451 Hexapeptide repeat, IPR002110 Ankyrin repeat, IPR011004 Trimeric LpxA-like superfamily, IPR018357 Hexapeptide transferase, conserved site, IPR020683 Domain of unknown function DUF3447, IPR024688 Maltose/galactoside acetyltransferase, IPR036770 Ankyrin repeat-containing domain superfamily, IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily FALSE NA
    cluster52 XANPAGTX0501_001958-T1 IPR001356 Homeobox domain, IPR009057 Homeobox-like domain superfamily, IPR017970 Homeobox, conserved site FALSE NA
    cluster52 XANPAGTX0501_001960-T1 NA FALSE Cluster_1
    cluster52 XANPAGTX0501_001957-T1 NA FALSE NA
    cluster53 XANPAGTX0501_002182-T1 NA FALSE Cluster_3
    cluster53 XANPAGTX0501_002186-T1 NA TRUE Cluster_3
    cluster53 XANPAGTX0501_002184-T1 NA FALSE Cluster_3
    cluster53 XANPAGTX0501_002190-T1 IPR001425 Archaeal/bacterial/fungal rhodopsins, IPR018229 Rhodopsin, retinal binding site FALSE Cluster_3
    cluster53 XANPAGTX0501_002191-T1 NA TRUE Cluster_3
    cluster53 XANPAGTX0501_002185-T1 NA FALSE Cluster_3
    cluster53 XANPAGTX0501_002183-T1 NA FALSE Cluster_3
    cluster54 XANPAGTX0501_002355-T1 NA FALSE NA
    cluster54 XANPAGTX0501_002358-T1 NA FALSE NA
    cluster54 XANPAGTX0501_002357-T1 NA FALSE NA
    cluster54 XANPAGTX0501_002359-T1 NA FALSE NA
    cluster54 XANPAGTX0501_002356-T1 IPR006140 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding domain, IPR029752 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding domain conserved site 1, IPR029753 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding domain conserved site, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster55 XANPAGTX0501_002410-T1 NA FALSE NA
    cluster55 XANPAGTX0501_002409-T1 IPR001338 Hydrophobin TRUE NA
    cluster55 XANPAGTX0501_002412-T2 NA TRUE NA
    cluster55 XANPAGTX0501_002411-T1 NA FALSE NA
    cluster55 XANPAGTX0501_002412-T1 NA TRUE NA
    cluster55 XANPAGTX0501_002408-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002441-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002437-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002427-T1 IPR001138 Zn(2)Cys(6) fungal-type DNA-binding domain, IPR036864 Zn(2)-C6 fungal-type DNA-binding domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002433-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002440-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002435-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002428-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002434-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002429-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002436-T1 IPR002110 Ankyrin repeat, IPR003593 AAA+ ATPase domain, IPR007111 NACHT nucleoside triphosphatase, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031348 Fungal N-terminal domain of STAND protein, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster56 XANPAGTX0501_002439-T1 NA FALSE NA
    cluster56 XANPAGTX0501_002424-T1 NA FALSE NA
    cluster57 XANPAGTX0501_010180-T1 NA FALSE NA
    cluster57 XANPAGTX0501_010175-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE NA
    cluster57 XANPAGTX0501_010169-T1 IPR035992 Ricin B-like lectins FALSE NA
    cluster57 XANPAGTX0501_010173-T1 NA FALSE NA
    cluster57 XANPAGTX0501_010171-T1 NA FALSE NA
    cluster57 XANPAGTX0501_010172-T1 IPR010730 Heterokaryon incompatibility FALSE NA
    cluster57 XANPAGTX0501_010174-T1 NA FALSE NA
    cluster57 XANPAGTX0501_010170-T1 IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins TRUE NA
    cluster58 XANPAGTX0501_010244-T1 IPR000182 GNAT domain, IPR016181 Acyl-CoA N-acyltransferase FALSE NA
    cluster58 XANPAGTX0501_010248-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR029058 Alpha/Beta hydrolase fold FALSE NA
    cluster58 XANPAGTX0501_010247-T1 NA TRUE NA
    cluster58 XANPAGTX0501_010250-T1 NA TRUE NA
    cluster59 XANPAGTX0501_010478-T1 IPR000589 Ribosomal protein S15, IPR009068 S15/NS1, RNA-binding FALSE NA
    cluster59 XANPAGTX0501_010475-T1 IPR007111 NACHT nucleoside triphosphatase, IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE NA
    cluster59 XANPAGTX0501_010476-T1 NA FALSE NA
    cluster59 XANPAGTX0501_010479-T1 NA FALSE NA
    cluster60 XANPAGTX0501_010557-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR022742 Serine aminopeptidase, S33, IPR029058 Alpha/Beta hydrolase fold FALSE NA
    cluster60 XANPAGTX0501_010558-T1 IPR000719 Protein kinase domain, IPR008266 Tyrosine-protein kinase, active site, IPR011009 Protein kinase-like domain superfamily, IPR017441 Protein kinase, ATP binding site FALSE NA
    cluster60 XANPAGTX0501_010559-T1 NA FALSE NA
    cluster60 XANPAGTX0501_010556-T1 IPR000719 Protein kinase domain, IPR008271 Serine/threonine-protein kinase, active site, IPR011009 Protein kinase-like domain superfamily FALSE NA
    cluster60 XANPAGTX0501_010551-T1 IPR006094 FAD linked oxidase, N-terminal, IPR016166 FAD-binding domain, PCMH-type, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily FALSE NA
    cluster60 XANPAGTX0501_010552-T1 NA FALSE NA
    cluster61 XANPAGTX0501_010572-T1 NA FALSE NA
    cluster61 XANPAGTX0501_010571-T1 IPR029498 Prion-inhibition and propagation, HeLo domain, IPR038305 HeLo domain superfamily FALSE NA
    cluster61 XANPAGTX0501_010570-T1 IPR002182 NB-ARC, IPR010730 Heterokaryon incompatibility, IPR011990 Tetratricopeptide-like helical domain superfamily, IPR013026 None, IPR019734 Tetratricopeptide repeat, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031352 NACHT-NTPase and P-loop NTPases, N-terminal domain, IPR041664 Orc1-like, AAA ATPase domain FALSE NA
    cluster62 XANPAGTX0501_002486-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE Cluster_2
    cluster62 XANPAGTX0501_002483-T3 IPR007229 Nicotinate phosphoribosyltransferase family, IPR036068 Nicotinate phosphoribosyltransferase-like, C-terminal, IPR040727 Nicotinate phosphoribosyltransferase, N-terminal domain, IPR041525 Nicotinate/nicotinamide phosphoribosyltransferase FALSE Cluster_2
    cluster62 XANPAGTX0501_002491-T1 IPR021848 Haem-dependent oxidative N-demethylase, alpha subunit-like FALSE Cluster_2
    cluster62 XANPAGTX0501_002488-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR009081 Phosphopantetheine binding ACP domain, IPR010071 Amino acid adenylation domain, IPR010080 Thioester reductase-like domain, IPR013120 Fatty acyl-coenzyme A reductase, NAD-binding domain, IPR020845 AMP-binding, conserved site, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE Cluster_2
    cluster62 XANPAGTX0501_002487-T1 IPR001585 Transaldolase/Fructose-6-phosphate aldolase, IPR013785 Aldolase-type TIM barrel, IPR018225 Transaldolase, active site FALSE Cluster_2
    cluster62 XANPAGTX0501_002490-T1 IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_2
    cluster63 XANPAGTX0501_002503-T1 NA FALSE NA
    cluster63 XANPAGTX0501_002505-T1 NA FALSE NA
    cluster63 XANPAGTX0501_002504-T1 IPR008441 AfumC-like glycosyltransferase, IPR029044 Nucleotide-diphospho-sugar transferases FALSE NA
    cluster63 XANPAGTX0501_002506-T1 NA FALSE NA
    cluster63 XANPAGTX0501_002514-T1 NA TRUE NA
    cluster64 XANPAGTX0501_002562-T1 IPR004045 Glutathione S-transferase, N-terminal, IPR004046 Glutathione S-transferase, C-terminal, IPR010987 Glutathione S-transferase, C-terminal-like, IPR036249 Thioredoxin-like superfamily, IPR036282 Glutathione S-transferase, C-terminal domain superfamily, IPR040079 Glutathione transferase family FALSE NA
    cluster64 XANPAGTX0501_002566-T1 IPR002893 Zinc finger, MYND-type, IPR027974 Domain of unknown function DUF4470 FALSE NA
    cluster64 XANPAGTX0501_002565-T1 NA TRUE NA
    cluster65 XANPAGTX0501_002780-T1 IPR000210 BTB/POZ domain, IPR011333 SKP1/BTB/POZ domain superfamily FALSE NA
    cluster65 XANPAGTX0501_002783-T1 NA FALSE NA
    cluster65 XANPAGTX0501_002791-T1 IPR024079 Metallopeptidase, catalytic domain superfamily FALSE NA
    cluster65 XANPAGTX0501_002792-T1 NA FALSE NA
    cluster66 XANPAGTX0501_002946-T1 NA FALSE NA
    cluster66 XANPAGTX0501_002943-T1 NA FALSE NA
    cluster66 XANPAGTX0501_002947-T1 IPR000330 SNF2, N-terminal, IPR001650 Helicase, C-terminal, IPR001841 Zinc finger, RING-type, IPR013083 Zinc finger, RING/FYVE/PHD-type, IPR014001 Helicase superfamily 1/2, ATP-binding domain, IPR017907 Zinc finger, RING-type, conserved site, IPR018957 Zinc finger, C3HC4 RING-type, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR038718 SNF2-like, N-terminal domain superfamily FALSE NA
    cluster67 XANPAGTX0501_002961-T1 NA FALSE NA
    cluster67 XANPAGTX0501_002962-T1 NA FALSE NA
    cluster67 XANPAGTX0501_002967-T1 NA FALSE NA
    cluster68 XANPAGTX0501_010596-T1 NA FALSE NA
    cluster68 XANPAGTX0501_010597-T1 NA FALSE NA
    cluster68 XANPAGTX0501_010598-T1 NA FALSE NA
    cluster68 XANPAGTX0501_010603-T1 NA FALSE NA
    cluster68 XANPAGTX0501_010604-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster68 XANPAGTX0501_010605-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster68 XANPAGTX0501_010606-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster68 XANPAGTX0501_010607-T1 IPR002110 Ankyrin repeat, IPR003593 AAA+ ATPase domain, IPR007111 NACHT nucleoside triphosphatase, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031348 Fungal N-terminal domain of STAND protein, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster68 XANPAGTX0501_010608-T1 IPR002110 Ankyrin repeat, IPR003593 AAA+ ATPase domain, IPR007111 NACHT nucleoside triphosphatase, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031348 Fungal N-terminal domain of STAND protein, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster69 XANPAGTX0501_003173-T1 NA FALSE Cluster_1
    cluster69 XANPAGTX0501_003171-T1 NA FALSE Cluster_1
    cluster69 XANPAGTX0501_003172-T1 NA FALSE Cluster_1
    cluster70 XANPAGTX0501_003213-T1 IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily, IPR036047 F-box-like domain superfamily FALSE NA
    cluster70 XANPAGTX0501_003198-T1 IPR022085 Oxopyrrolidines biosynthesis cluster protein G FALSE Cluster_2
    cluster70 XANPAGTX0501_003205-T1 IPR001128 Cytochrome P450, IPR002401 Cytochrome P450, E-class, group I, IPR017972 Cytochrome P450, conserved site, IPR036396 Cytochrome P450 superfamily FALSE NA
    cluster70 XANPAGTX0501_003212-T1 IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily, IPR036047 F-box-like domain superfamily FALSE NA
    cluster70 XANPAGTX0501_003200-T1 NA FALSE Cluster_2
    cluster70 XANPAGTX0501_003211-T1 NA FALSE NA
    cluster70 XANPAGTX0501_003199-T1 IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily, IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain FALSE Cluster_2
    cluster71 XANPAGTX0501_003289-T1 NA FALSE Cluster_4
    cluster71 XANPAGTX0501_003282-T1 NA FALSE NA
    cluster71 XANPAGTX0501_003283-T1 NA FALSE NA
    cluster71 XANPAGTX0501_003285-T1 IPR008758 Peptidase S28, IPR029058 Alpha/Beta hydrolase fold, IPR042269 Serine carboxypeptidase S28, SKS domain TRUE NA
    cluster71 XANPAGTX0501_003287-T1 NA TRUE Cluster_4
    cluster72 XANPAGTX0501_003714-T1 IPR001077 O-methyltransferase domain, IPR016461 O-methyltransferase COMT-type, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR036388 Winged helix-like DNA-binding domain superfamily, IPR036390 Winged helix DNA-binding domain superfamily FALSE NA
    cluster72 XANPAGTX0501_003715-T1 NA FALSE NA
    cluster72 XANPAGTX0501_003723-T1 NA FALSE NA
    cluster73 XANPAGTX0501_003766-T1 NA TRUE NA
    cluster73 XANPAGTX0501_003775-T2 IPR003100 PAZ domain, IPR003165 Piwi domain, IPR012337 Ribonuclease H-like superfamily, IPR014811 Argonaute, linker 1 domain, IPR032472 Argonaute linker 2 domain, IPR032473 Protein argonaute, Mid domain, IPR032474 Protein argonaute, N-terminal, IPR036085 PAZ domain superfamily, IPR036397 Ribonuclease H superfamily FALSE NA
    cluster73 XANPAGTX0501_003775-T1 IPR003100 PAZ domain, IPR003165 Piwi domain, IPR012337 Ribonuclease H-like superfamily, IPR014811 Argonaute, linker 1 domain, IPR032472 Argonaute linker 2 domain, IPR032473 Protein argonaute, Mid domain, IPR032474 Protein argonaute, N-terminal, IPR036085 PAZ domain superfamily, IPR036397 Ribonuclease H superfamily FALSE NA
    cluster73 XANPAGTX0501_003767-T1 NA FALSE NA
    cluster73 XANPAGTX0501_003765-T1 IPR000420 Yeast PIR protein repeat TRUE NA
    cluster74 XANPAGTX0501_004003-T1 NA FALSE NA
    cluster74 XANPAGTX0501_004008-T1 NA FALSE NA
    cluster74 XANPAGTX0501_004006-T1 IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR041677 DNA2/NAM7 helicase, helicase domain, IPR041679 DNA2/NAM7 helicase-like, C-terminal FALSE NA
    cluster75 XANPAGTX0501_004414-T1 IPR027417 P-loop containing nucleoside triphosphate hydrolase FALSE Cluster_2
    cluster75 XANPAGTX0501_004416-T1 IPR000246 Peptidase T2, asparaginase 2, IPR029055 Nucleophile aminohydrolases, N-terminal, IPR037464 Threonine aspartase 1 FALSE Cluster_2
    cluster75 XANPAGTX0501_004407-T1 IPR002925 Dienelactone hydrolase, IPR029058 Alpha/Beta hydrolase fold FALSE Cluster_2
    cluster75 XANPAGTX0501_004415-T1 NA FALSE Cluster_2
    cluster76 XANPAGTX0501_004429-T1 NA FALSE NA
    cluster76 XANPAGTX0501_004431-T1 NA FALSE NA
    cluster76 XANPAGTX0501_004430-T1 NA FALSE NA
    cluster76 XANPAGTX0501_004428-T1 IPR004695 Transporter protein SLAC1/Mae1/ Ssu1/TehA, IPR030185 Malic acid transport protein, IPR038665 Voltage-dependent anion channel superfamily FALSE NA
    cluster77 XANPAGTX0501_004520-T1 NA FALSE NA
    cluster77 XANPAGTX0501_004519-T1 NA FALSE NA
    cluster77 XANPAGTX0501_004521-T1 IPR008271 Serine/threonine-protein kinase, active site, IPR011009 Protein kinase-like domain superfamily FALSE NA
    cluster77 XANPAGTX0501_004530-T1 NA FALSE NA
    cluster77 XANPAGTX0501_004515-T1 IPR032710 NTF2-like domain superfamily, IPR037401 SnoaL-like domain TRUE NA
    cluster77 XANPAGTX0501_004525-T1 IPR000719 Protein kinase domain, IPR008271 Serine/threonine-protein kinase, active site, IPR011009 Protein kinase-like domain superfamily FALSE NA
    cluster78 XANPAGTX0501_004956-T1 NA TRUE NA
    cluster78 XANPAGTX0501_004961-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR031348 Fungal N-terminal domain of STAND protein, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster78 XANPAGTX0501_004963-T1 IPR001915 Peptidase M48 FALSE NA
    cluster78 XANPAGTX0501_004965-T1 NA FALSE NA
    cluster78 XANPAGTX0501_004966-T1 NA FALSE NA
    cluster79 XANPAGTX0501_004998-T1 IPR000182 GNAT domain, IPR016181 Acyl-CoA N-acyltransferase FALSE NA
    cluster79 XANPAGTX0501_004997-T1 NA FALSE NA
    cluster79 XANPAGTX0501_004983-T1 NA TRUE NA
    cluster79 XANPAGTX0501_004996-T1 IPR002293 Amino acid/polyamine transporter I FALSE NA
    cluster79 XANPAGTX0501_004991-T1 NA FALSE NA
    cluster79 XANPAGTX0501_004992-T1 IPR013087 Zinc finger C2H2-type FALSE NA
    cluster79 XANPAGTX0501_004990-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005026-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005021-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005022-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005029-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005027-T1 IPR003370 Chromate transporter, IPR014047 Chromate transporter, long chain FALSE NA
    cluster80 XANPAGTX0501_005033-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005028-T1 NA FALSE NA
    cluster80 XANPAGTX0501_005025-T1 NA FALSE NA
    cluster81 XANPAGTX0501_005082-T1 IPR000537 UbiA prenyltransferase family FALSE Cluster_3
    cluster81 XANPAGTX0501_005081-T1 NA FALSE Cluster_3
    cluster81 XANPAGTX0501_005084-T1 IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily FALSE Cluster_3
    cluster81 XANPAGTX0501_005085-T1 NA FALSE Cluster_3
    cluster81 XANPAGTX0501_005076-T1 IPR000092 Polyprenyl synthetase, IPR008949 Isoprenoid synthase domain superfamily, IPR033749 Polyprenyl synthetase, conserved site FALSE Cluster_3
    cluster81 XANPAGTX0501_005074-T1 NA FALSE Cluster_3
    cluster81 XANPAGTX0501_005083-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR029058 Alpha/Beta hydrolase fold FALSE Cluster_3
    cluster82 XANPAGTX0501_005252-T1 IPR001227 Acyl transferase domain superfamily, IPR009081 Phosphopantetheine binding ACP domain, IPR013217 Methyltransferase type 12, IPR013968 Polyketide synthase, ketoreductase domain, IPR014030 Beta-ketoacyl synthase, N-terminal, IPR014031 Beta-ketoacyl synthase, C-terminal, IPR014043 Acyl transferase, IPR016035 Acyl transferase/acyl hydrolase/lysophospholipase, IPR016036 Malonyl-CoA ACP transacylase, ACP-binding, IPR016039 Thiolase-like, IPR018201 Beta-ketoacyl synthase, active site, IPR020801 None, IPR020806 Polyketide synthase, phosphopantetheine-binding domain, IPR020807 Polyketide synthase, dehydratase domain, IPR020841 Polyketide synthase, beta-ketoacyl synthase domain, IPR029063 S-adenosyl-L-methionine-dependent methyltransferase superfamily, IPR032821 Polyketide synthase, C-terminal extension, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042104 Polyketide synthase, dehydratase domain superfamily FALSE Cluster_2
    cluster82 XANPAGTX0501_005256-T1 IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily TRUE Cluster_2
    cluster82 XANPAGTX0501_005254-T1 IPR007527 Zinc finger, SWIM-type FALSE Cluster_2
    cluster82 XANPAGTX0501_005265-T1 IPR001283 Cysteine-rich secretory protein-related, IPR014044 CAP domain, IPR018244 Allergen V5/Tpx-1-related, conserved site, IPR035940 CAP superfamily FALSE Cluster_2
    cluster82 XANPAGTX0501_005255-T1 IPR016040 NAD(P)-binding domain, IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_2
    cluster82 XANPAGTX0501_005253-T1 IPR005123 Oxoglutarate/iron-dependent dioxygenase, IPR027443 Isopenicillin N synthase-like superfamily FALSE Cluster_2
    cluster83 XANPAGTX0501_005280-T1 NA FALSE Cluster_3
    cluster83 XANPAGTX0501_005278-T1 NA FALSE NA
    cluster83 XANPAGTX0501_005281-T1 NA FALSE Cluster_3
    cluster83 XANPAGTX0501_005284-T1 NA FALSE Cluster_3
    cluster83 XANPAGTX0501_005271-T1 NA FALSE NA
    cluster83 XANPAGTX0501_005279-T1 NA FALSE Cluster_3
    cluster84 XANPAGTX0501_005361-T1 NA TRUE Cluster_4
    cluster84 XANPAGTX0501_005362-T1 NA FALSE Cluster_4
    cluster84 XANPAGTX0501_005378-T1 IPR002293 Amino acid/polyamine transporter I FALSE Cluster_5
    cluster84 XANPAGTX0501_005364-T1 NA TRUE NA
    cluster84 XANPAGTX0501_005371-T1 NA FALSE NA
    cluster84 XANPAGTX0501_005367-T1 NA FALSE NA
    cluster84 XANPAGTX0501_005368-T1 NA FALSE NA
    cluster84 XANPAGTX0501_005370-T1 NA FALSE NA
    cluster85 XANPAGTX0501_005534-T1 NA FALSE NA
    cluster85 XANPAGTX0501_005531-T1 IPR008949 Isoprenoid synthase domain superfamily FALSE NA
    cluster85 XANPAGTX0501_005529-T1 IPR002110 Ankyrin repeat, IPR020683 Domain of unknown function DUF3447, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster85 XANPAGTX0501_005532-T1 IPR008949 Isoprenoid synthase domain superfamily FALSE NA
    cluster85 XANPAGTX0501_005530-T1 IPR008949 Isoprenoid synthase domain superfamily FALSE NA
    cluster85 XANPAGTX0501_005541-T1 NA FALSE NA
    cluster85 XANPAGTX0501_005533-T1 NA TRUE NA
    cluster86 XANPAGTX0501_005615-T1 NA FALSE NA
    cluster86 XANPAGTX0501_005613-T1 IPR003615 HNH nuclease FALSE NA
    cluster86 XANPAGTX0501_005602-T1 IPR000073 Alpha/beta hydrolase fold-1, IPR000639 Epoxide hydrolase-like, IPR029058 Alpha/Beta hydrolase fold FALSE NA
    cluster86 XANPAGTX0501_005610-T1 NA FALSE NA
    cluster86 XANPAGTX0501_005604-T1 IPR015590 Aldehyde dehydrogenase domain, IPR016161 Aldehyde/histidinol dehydrogenase, IPR016162 Aldehyde dehydrogenase, N-terminal, IPR016163 Aldehyde dehydrogenase, C-terminal, IPR029510 Aldehyde dehydrogenase, glutamic acid active site FALSE NA
    cluster86 XANPAGTX0501_005612-T1 NA FALSE NA
    cluster86 XANPAGTX0501_005611-T1 NA FALSE NA
    cluster86 XANPAGTX0501_005614-T1 NA FALSE NA
    cluster87 XANPAGTX0501_005697-T2 IPR002110 Ankyrin repeat, IPR003593 AAA+ ATPase domain, IPR007111 NACHT nucleoside triphosphatase, IPR020683 Domain of unknown function DUF3447, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR031348 Fungal N-terminal domain of STAND protein, IPR036770 Ankyrin repeat-containing domain superfamily FALSE NA
    cluster87 XANPAGTX0501_005693-T1 NA FALSE NA
    cluster87 XANPAGTX0501_005695-T1 NA FALSE NA
    cluster87 XANPAGTX0501_005694-T1 NA FALSE NA
    cluster87 XANPAGTX0501_005692-T1 IPR000873 AMP-dependent synthetase/ligase domain, IPR009081 Phosphopantetheine binding ACP domain, IPR010071 Amino acid adenylation domain, IPR010080 Thioester reductase-like domain, IPR013120 Fatty acyl-coenzyme A reductase, NAD-binding domain, IPR020845 AMP-binding, conserved site, IPR036291 NAD(P)-binding domain superfamily, IPR036736 ACP-like superfamily, IPR042099 ANL, N-terminal domain FALSE NA
    cluster87 XANPAGTX0501_005699-T1 IPR010730 Heterokaryon incompatibility FALSE NA
    cluster88 XANPAGTX0501_005737-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE Cluster_1
    cluster88 XANPAGTX0501_005733-T1 NA TRUE Cluster_1
    cluster88 XANPAGTX0501_005735-T1 IPR018824 Conidiation-specific protein 6 FALSE Cluster_1
    cluster88 XANPAGTX0501_005732-T1 IPR001128 Cytochrome P450, IPR002401 Cytochrome P450, E-class, group I, IPR017972 Cytochrome P450, conserved site, IPR036396 Cytochrome P450 superfamily FALSE Cluster_1
    cluster89 XANPAGTX0501_005775-T1 NA FALSE NA
    cluster89 XANPAGTX0501_005766-T1 NA FALSE NA
    cluster89 XANPAGTX0501_005770-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster89 XANPAGTX0501_005771-T1 IPR001547 Glycoside hydrolase, family 5, IPR017853 Glycoside hydrolase superfamily TRUE NA
    cluster90 XANPAGTX0501_006137-T1 NA FALSE NA
    cluster90 XANPAGTX0501_006141-T1 NA FALSE NA
    cluster90 XANPAGTX0501_006142-T1 NA TRUE NA
    cluster90 XANPAGTX0501_006143-T1 NA FALSE NA
    cluster91 XANPAGTX0501_006205-T1 NA FALSE NA
    cluster91 XANPAGTX0501_006203-T1 IPR001841 Zinc finger, RING-type, IPR002867 IBR domain, IPR013083 Zinc finger, RING/FYVE/PHD-type, IPR017907 Zinc finger, RING-type, conserved site FALSE NA
    cluster91 XANPAGTX0501_006215-T1 NA FALSE NA
    cluster91 XANPAGTX0501_006214-T1 NA FALSE NA
    cluster91 XANPAGTX0501_006204-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006245-T1 NA TRUE NA
    cluster92 XANPAGTX0501_006239-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006250-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006243-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006256-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006232-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    cluster92 XANPAGTX0501_006255-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006233-T1 IPR001128 Cytochrome P450, IPR002403 Cytochrome P450, E-class, group IV, IPR036396 Cytochrome P450 superfamily FALSE NA
    cluster92 XANPAGTX0501_006262-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006253-T1 IPR014849 EKC/KEOPS complex, subunit Gon7 FALSE NA
    cluster92 XANPAGTX0501_006257-T1 IPR000490 Glycoside hydrolase family 17, IPR017853 Glycoside hydrolase superfamily TRUE NA
    cluster92 XANPAGTX0501_006238-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006246-T1 NA FALSE NA
    cluster92 XANPAGTX0501_006237-T1 IPR002347 Short-chain dehydrogenase/reductase SDR, IPR036291 NAD(P)-binding domain superfamily FALSE NA
    croc_c<-read.delim("../analysis_and_temp_files/08_dge_culture_lichen/croc_culture_upregulated.txt",header=F)
    croc_c$cluster<-paste0("cluster",1:nrow(croc_c))
    croc_c<-croc_c %>% separate_longer_delim(V9, delim = " ") %>% filter(V9!="") %>% 
      select(-c(V7,V8,V2,V3))
    colnames(croc_c)<-c("contig","start","end","pval","TranscriptID","cluster")
    croc_c %>% group_by(cluster) %>% summarize(n=n()) %>% arrange(desc(n)) %>% head()
    ## # A tibble: 6 × 2
    ##   cluster       n
    ##   <chr>     <int>
    ## 1 cluster31    13
    ## 2 cluster22    10
    ## 3 cluster27    10
    ## 4 cluster38    10
    ## 5 cluster39     9
    ## 6 cluster43     9

    Only a small fraction if the clusters overlap with BGCs

    croc_l3<-croc_l2 %>% 
       mutate(is_SM = ifelse(is.na(antiSMASH),"not_in_SM_cluster","in_SM_cluster")) %>%
      group_by(cluster,is_SM) %>% summarize(n=n()) %>%
       pivot_wider(values_from = n, values_fill = 0, names_from = is_SM)
    ## `summarise()` has grouped output by 'cluster'. You can override using the
    ## `.groups` argument.
    croc_c2<-croc_c %>% left_join(funannot2) %>% 
      select(cluster,antiSMASH,TranscriptID, InterPro_new,secreted_consensus) 
    ## Joining with `by = join_by(TranscriptID)`
    croc_c3<-croc_c2 %>% 
       mutate(is_SM = ifelse(is.na(antiSMASH),"not_in_SM_cluster","in_SM_cluster")) %>%
      group_by(cluster,is_SM) %>% summarize(n=n()) %>%
       pivot_wider(values_from = n, values_fill = 0, names_from = is_SM)
    ## `summarise()` has grouped output by 'cluster'. You can override using the
    ## `.groups` argument.
    • Out of 92 upregulated in lichens, 24 overlap with SM clusters
    • Out of 49 upregulated in lichens, 10 overlap with SM clusters

    Little overlap with high repeat content regions

    • Translate CROC coordinates into bed format, so it is easier to visualize
    #First, save CROC tables in a bed format
    croc_l<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/croc_lichen_upregulated.txt",header=F)
    write.table(croc_l[,c(1,4,5)],"../analysis_and_temp_files/08_dge_culture_lichen/GTX0501_lichen_upregulated_croc.bed",col.names = F,row.names = F,quote = F,sep="\t")
    
    croc_c<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/croc_culture_upregulated.txt",header=F)
    write.table(croc_c[,c(1,4,5)],"../analysis_and_temp_files/08_dge_culture_lichen/GTX0501_culture_upregulated_croc.bed",col.names = F,row.names = F,quote = F,sep="\t")
    
    #intersec CROC file with the bed file
    sbatch --mem=10G -c 5 --partition=tsl-short --wrap="source package b0ed0698-358b-4c9b-9d21-603ea8d6e478; bedtools coverage -a ../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_xanthoria_nuclear_mag_clean_sort.bed -b analysis_and_temp_files/08_dge_culture_lichen/GTX0501_lichen_upregulated_croc.bed > analysis_and_temp_files/08_dge_culture_lichen/GTX0501_xanthoria_nuclear_mag_clean_sort_lichen_upregulated_croc_bedtools.txt"
    
    sbatch --mem=10G -c 5 --partition=tsl-short --wrap="source package b0ed0698-358b-4c9b-9d21-603ea8d6e478; bedtools coverage -a ../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_xanthoria_nuclear_mag_clean_sort.bed -b analysis_and_temp_files/08_dge_culture_lichen/GTX0501_culture_upregulated_croc.bed > analysis_and_temp_files/08_dge_culture_lichen/GTX0501_xanthoria_nuclear_mag_clean_sort_culture_upregulated_croc_bedtools.txt"
    • Plot separately
    #read in separately
    croc_l<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/GTX0501_xanthoria_nuclear_mag_clean_sort_lichen_upregulated_croc_bedtools.txt",header=F)[,c(1,2,7)]
    colnames(croc_l)<-c("contig","window_start","croc")
    croc_l$croc<-as.numeric(croc_l$croc)
    croc_l<-croc_l %>% mutate(croc_binary_l=if_else(croc>0,1,0))
    
    croc_c<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/GTX0501_xanthoria_nuclear_mag_clean_sort_culture_upregulated_croc_bedtools.txt",header=F)[,c(1,2,7)]
    colnames(croc_c)<-c("contig","window_start","croc")
    croc_c$croc<-as.numeric(croc_c$croc)
    croc_c<-croc_c %>% mutate(croc_binary_c=if_else(croc>0,1,0))
    • Plot lichen upregulated
    ggplot(croc_l,aes(y=fct_reorder(contig,window_start),x=window_start))+
        geom_tile(aes(height=0.1),fill="black")+
      geom_tile(aes(fill=croc_binary_l,alpha=croc_binary_l))+
      xlab("")+ylab("")+
      scale_alpha_continuous(range=c(0,1))+
      scale_fill_gradient(low="#f0f0f0",high="#F8766D")+
       scale_x_continuous(breaks = c(0,1000000,2000000,3000000,4000000),
                         labels = c("0","1 Mbp","2 Mbp","3 Mbp","4 Mbp"))+
      theme_minimal()+
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

    * Plot culture upregulated

    ggplot(croc_c,aes(y=fct_reorder(contig,window_start),x=window_start))+
      geom_tile(aes(height=0.1),fill="black")+
        geom_tile(aes(fill=croc_binary_c,alpha=croc_binary_c))+
      xlab("")+ylab("")+
      scale_alpha_continuous(range=c(0,1))+
      scale_fill_gradient(low="#f0f0f0",high="#619CFF")+
       scale_x_continuous(breaks = c(0,1000000,2000000,3000000,4000000),
                         labels = c("0","1 Mbp","2 Mbp","3 Mbp","4 Mbp"))+
      theme_minimal()+
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

    • Plot together with repeat content
    library(ggnewscale)
    library(patchwork)
    rep<-read.delim2("../../02_mycobiont_genome/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_xanthoria_nuclear_mag_clean_sort_repeatcov.txt",header=F)[,c(1,2,7)]
    colnames(rep)<-c("contig","window_start","repeat_content")
    rep$repeat_content<- rep$repeat_content %>% as.numeric()
    
    df2 <- rep %>% left_join(croc_l %>% select(-croc)) %>% 
      left_join(croc_c %>% select(-croc)) %>%
      pivot_longer(-c(contig, window_start),names_to = "parameter",values_to = "value") 
    ## Joining with `by = join_by(contig, window_start)`
    ## Joining with `by = join_by(contig, window_start)`
    df2$parameter<-plyr::revalue(df2$parameter, c( "repeat_content"="CRepeat%","croc_binary_l"="BLichen_upregulated",
            "croc_binary_c"="ACulture_upregulated"))
    df2$contig<-reorder(df2$contig,df2$contig,FUN=function(x) -length(x))
    
    df2<-df2 %>% mutate(panel=case_when(
      contig %in% c("Xp_GTX0501_1","Xp_GTX0501_2","Xp_GTX0501_3",  "Xp_GTX0501_4", 
    "Xp_GTX0501_5",  "Xp_GTX0501_6",  "Xp_GTX0501_7",  "Xp_GTX0501_8", 
    "Xp_GTX0501_9",  "Xp_GTX0501_10", "Xp_GTX0501_11", "Xp_GTX0501_12",
    "Xp_GTX0501_13", "Xp_GTX0501_14", "Xp_GTX0501_15", "Xp_GTX0501_16",
    "Xp_GTX0501_17", "Xp_GTX0501_18", "Xp_GTX0501_19") ~ 1,
    contig %in% c("Xp_GTX0501_19", "Xp_GTX0501_20","Xp_GTX0501_21", "Xp_GTX0501_22", "Xp_GTX0501_23", "Xp_GTX0501_24","Xp_GTX0501_25", "Xp_GTX0501_26", "Xp_GTX0501_27", "Xp_GTX0501_28","Xp_GTX0501_29", "Xp_GTX0501_30", "Xp_GTX0501_31", "Xp_GTX0501_32",
    "Xp_GTX0501_33", "Xp_GTX0501_34", "Xp_GTX0501_35", "Xp_GTX0501_36", "Xp_GTX0501_37", "Xp_GTX0501_38") ~ 2, T ~3))
    
    gg1<-df2 %>% filter(panel==1) |> 
      ggplot() +
      # apply the gradient to group 1
    geom_tile(aes(y=parameter,x=window_start,fill=value), 
        filter(df2, parameter == "CRepeat%",panel==1)) +
     scale_fill_gradientn(colours = c("purple", "orange"),guide = 'none')+
      labs(fill = "Repeat%") +
      # start a new scale
    new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "BLichen_upregulated",panel==1))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "BLichen_upregulated",panel==1))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#F8766D"),guide = 'none')+
      # start a new scale
      new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "ACulture_upregulated",panel==1))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "ACulture_upregulated",panel==1))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#619CFF"),guide = 'none')+
      scale_x_continuous(breaks = c(0,1000000,2000000,3000000,4000000),
                         labels = c("0","1 Mbp","2 Mbp","3 Mbp","4 Mbp"),expand = c(0, 0))+
      labs(fill = "Culture-upregulated clusters") +  xlab("")+ylab("Contigs")+
      facet_wrap(~contig,ncol=1)+
      theme_classic()+
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
            strip.background = element_blank(),strip.text.x = element_blank(),
            axis.text.y = element_blank(),legend.position="bottom",
            axis.line.y = element_blank(),axis.ticks.y = element_blank())
    ## Scale for alpha is already present.
    ## Adding another scale for alpha, which will replace the existing scale.
    gg2<-df2 %>% filter(panel==2) |> 
      ggplot() +
      # apply the gradient to group 1
    geom_tile(aes(y=parameter,x=window_start,fill=value), 
        filter(df2, parameter == "CRepeat%",panel==2)) +
     scale_fill_gradientn(colours = c("purple", "orange"),guide = 'none')+
      labs(fill = "Repeat%") +
      # start a new scale
    new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "BLichen_upregulated",panel==2))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "BLichen_upregulated",panel==2))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#F8766D"),guide = 'none')+
      # start a new scale
      new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "ACulture_upregulated",panel==2))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "ACulture_upregulated",panel==2))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#619CFF"),guide = 'none')+
      scale_x_continuous(breaks = c(0,1000000,2000000,3000000,4000000),
                         labels = c("0","1 Mbp","2 Mbp","3 Mbp","4 Mbp"),expand = c(0, 0))+
      labs(fill = "Culture-upregulated clusters") +  xlab("")+ylab("Contigs")+
      facet_wrap(~contig,ncol=1)+
      theme_classic()+
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
            strip.background = element_blank(),strip.text.x = element_blank(),
            axis.text.y = element_blank(),legend.position="bottom",
            axis.line.y = element_blank(),axis.ticks.y = element_blank())
    ## Scale for alpha is already present.
    ## Adding another scale for alpha, which will replace the existing scale.
    gg3<-df2 %>% filter(panel==3) |> 
      ggplot() +
      # apply the gradient to group 1
    geom_tile(aes(y=parameter,x=window_start,fill=value), 
        filter(df2, parameter == "CRepeat%",panel==3)) +
     scale_fill_gradientn(colours = c("purple", "orange"),guide = 'none')+
      labs(fill = "Repeat%") +
      # start a new scale
    new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "BLichen_upregulated",panel==3))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "BLichen_upregulated",panel==3))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#F8766D"),guide = 'none')+
      # start a new scale
      new_scale_fill() +
      geom_tile(aes(y=parameter,x=window_start,height=0.25),fill="black", filter(df2, parameter == "ACulture_upregulated",panel==3))+
        geom_tile(aes(y=parameter,x=window_start,fill=value,alpha=value), filter(df2, parameter == "ACulture_upregulated",panel==3))+
      scale_alpha_continuous(range=c(0,1),guide = 'none')+
      scale_fill_gradientn(colours = c("white","#619CFF"),guide = 'none')+
      scale_x_continuous(breaks = c(0,1000000,2000000,3000000,4000000),
                         labels = c("0","1 Mbp","2 Mbp","3 Mbp","4 Mbp"),expand = c(0, 0))+
      labs(fill = "Culture-upregulated clusters") +  xlab("")+ylab("Contigs")+
      facet_wrap(~contig,ncol=1)+
      theme_classic()+
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
            strip.background = element_blank(),strip.text.x = element_blank(),
            axis.text.y = element_blank(),legend.position="bottom",
            axis.line.y = element_blank(),axis.ticks.y = element_blank())
    ## Scale for alpha is already present.
    ## Adding another scale for alpha, which will replace the existing scale.
    #scale graph widths
    #max(df$window_start[df$panel==1]) / max(df$window_start[df$panel==3]) [1] 29.44444
    #max(df$window_start[df$panel==2]) / max(df$window_start[df$panel==3]) [1] 3.666667
    gg1+gg2+gg3 + plot_layout(guides = "collect",widths = c(29.4, 3.7, 1)) & theme(legend.position = 'bottom')

    * Save figure

    plot<-gg1+theme(axis.text = element_text(size=6),
              axis.title = element_text(size=7),
              legend.text = element_text(size=7),
              legend.title = element_text(size=8))+
      gg2+theme(axis.text = element_text(size=6),
              axis.title = element_text(size=7),
              legend.text = element_text(size=7),
              legend.title = element_text(size=8))+
      gg3 +theme(axis.text = element_text(size=6),
              axis.title = element_text(size=7),
              legend.text = element_text(size=7),
              legend.title = element_text(size=8))+
      plot_layout(guides = "collect",widths = c(29.4, 3.7, 1)) & theme(legend.position = 'bottom')
    ggsave('../results/croc_clusters_vs_repeats.pdf',plot, width = 5, height = 5)
    • How many clusters overlap with high-repeat content regions? to chose the cut-off, made a histogram
    ggplot(rep,aes(x=repeat_content))+geom_histogram()
    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

    • Will go with 0.5 cutoff
    • 8 (9%) lichen upregulated clusters overlap with high repeat content regions
    library(GenomicRanges)
    ## Loading required package: stats4
    ## Loading required package: BiocGenerics
    ## 
    ## Attaching package: 'BiocGenerics'
    ## The following object is masked from 'package:limma':
    ## 
    ##     plotMA
    ## The following objects are masked from 'package:lubridate':
    ## 
    ##     intersect, setdiff, union
    ## The following objects are masked from 'package:dplyr':
    ## 
    ##     combine, intersect, setdiff, union
    ## The following objects are masked from 'package:stats':
    ## 
    ##     IQR, mad, sd, var, xtabs
    ## The following objects are masked from 'package:base':
    ## 
    ##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    ##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    ##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    ##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    ##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    ##     union, unique, unsplit, which.max, which.min
    ## Loading required package: S4Vectors
    ## Warning: package 'S4Vectors' was built under R version 4.1.3
    ## 
    ## Attaching package: 'S4Vectors'
    ## The following object is masked from 'package:gplots':
    ## 
    ##     space
    ## The following objects are masked from 'package:lubridate':
    ## 
    ##     second, second<-
    ## The following objects are masked from 'package:dplyr':
    ## 
    ##     first, rename
    ## The following object is masked from 'package:tidyr':
    ## 
    ##     expand
    ## The following objects are masked from 'package:base':
    ## 
    ##     expand.grid, I, unname
    ## Loading required package: IRanges
    ## 
    ## Attaching package: 'IRanges'
    ## The following object is masked from 'package:lubridate':
    ## 
    ##     %within%
    ## The following objects are masked from 'package:dplyr':
    ## 
    ##     collapse, desc, slice
    ## The following object is masked from 'package:purrr':
    ## 
    ##     reduce
    ## Loading required package: GenomeInfoDb
    croc_l<-read.delim2("../../03_transcriptomic_analysis/analysis_and_temp_files/08_dge_culture_lichen/croc_lichen_upregulated.txt",header=F)
    croc_c<-read.delim2("../../03_transcriptomic_analysis/analysis_and_temp_files/08_dge_culture_lichen/croc_culture_upregulated.txt",header=F)
    
    repeat_range <- GRanges(seqnames=rep$contig,ranges=IRanges(rep$window_start, rep$window_start+9999),repeat_content = rep$repeat_content)
    
    croc_l_range <-  GRanges(seqnames=croc_l$V1,ranges=IRanges(croc_l$V4, croc_l$V5),genes=croc_l$V9)
    croc_c_range <-  GRanges(seqnames=croc_c$V1,ranges=IRanges(croc_c$V4, croc_c$V5),genes=croc_c$V9)
    
    repeat_croc_l <- repeat_range[countOverlaps(repeat_range, croc_l_range ) > 0]
    repeat_croc_l2 <- repeat_croc_l[repeat_croc_l$repeat_content > 0.5]
    
    croc_l_range[countOverlaps(croc_l_range,repeat_croc_l2) > 0]
    ## GRanges object with 8 ranges and 1 metadata column:
    ##            seqnames          ranges strand |                  genes
    ##               <Rle>       <IRanges>  <Rle> |            <character>
    ##   [1] Xp_GTX0501_12     19342-42764      * | XANPAGTX0501_007271-..
    ##   [2] Xp_GTX0501_14   137017-141122      * | XANPAGTX0501_008007-..
    ##   [3] Xp_GTX0501_18   366441-424180      * | XANPAGTX0501_009046-..
    ##   [4]  Xp_GTX0501_2   399739-424931      * | XANPAGTX0501_001015-..
    ##   [5]  Xp_GTX0501_2 2298121-2324687      * | XANPAGTX0501_001710-..
    ##   [6] Xp_GTX0501_21   183347-198083      * | XANPAGTX0501_009435-..
    ##   [7] Xp_GTX0501_30     15022-50184      * | XANPAGTX0501_010180-..
    ##   [8]  Xp_GTX0501_8 1503462-1527344      * | XANPAGTX0501_005697-..
    ##   -------
    ##   seqinfo: 33 sequences from an unspecified genome; no seqlengths
    • 2 culture-upregulated clusters (4%) overlap with high repeat content regions
    library(GenomicRanges)
    repeat_croc_c <- repeat_range[countOverlaps(repeat_range, croc_c_range ) > 0]
    repeat_croc_c2 <- repeat_croc_l[repeat_croc_c$repeat_content > 0.5]
    
    croc_c_range[countOverlaps(croc_c_range,repeat_croc_c2) > 0]
    ## GRanges object with 2 ranges and 1 metadata column:
    ##            seqnames        ranges strand |                  genes
    ##               <Rle>     <IRanges>  <Rle> |            <character>
    ##   [1] Xp_GTX0501_13 711348-737772      * | XANPAGTX0501_007924-..
    ##   [2]  Xp_GTX0501_5  27829-113271      * | XANPAGTX0501_003202-..
    ##   -------
    ##   seqinfo: 26 sequences from an unspecified genome; no seqlengths